relaton-ogc 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.rspec +3 -0
- data/.rubocop.yml +10 -0
- data/.travis.yml +18 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +84 -0
- data/LICENSE.txt +21 -0
- data/README.adoc +117 -0
- data/Rakefile +6 -0
- data/appveyor.yml +37 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/relaton_ogc.rb +14 -0
- data/lib/relaton_ogc/data/bibliography.json +1 -0
- data/lib/relaton_ogc/data/etag.txt +1 -0
- data/lib/relaton_ogc/editorial_group.rb +66 -0
- data/lib/relaton_ogc/hash_converter.rb +20 -0
- data/lib/relaton_ogc/hit.rb +12 -0
- data/lib/relaton_ogc/hit_collection.rb +78 -0
- data/lib/relaton_ogc/ogc_bibliographic_item.rb +11 -0
- data/lib/relaton_ogc/ogc_bibliography.rb +77 -0
- data/lib/relaton_ogc/processor.rb +33 -0
- data/lib/relaton_ogc/scrapper.rb +135 -0
- data/lib/relaton_ogc/version.rb +3 -0
- data/lib/relaton_ogc/xml_parser.rb +33 -0
- data/relaton_ogc.gemspec +39 -0
- metadata +226 -0
@@ -0,0 +1,20 @@
|
|
1
|
+
module RelatonOgc
|
2
|
+
class HashConverter < RelatonIsoBib::HashConverter
|
3
|
+
class << self
|
4
|
+
private
|
5
|
+
|
6
|
+
# @param ret [Hash]
|
7
|
+
def editorialgroup_hash_to_bib(ret)
|
8
|
+
eg = ret[:editorialgroup]
|
9
|
+
return unless eg
|
10
|
+
|
11
|
+
ret[:editorialgroup] = EditorialGroup.new(
|
12
|
+
committee: eg[:committee],
|
13
|
+
subcommittee: eg[:subcommittee],
|
14
|
+
workgroup: eg[:workgroup],
|
15
|
+
secretariat: eg[:secretariat],
|
16
|
+
)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require "faraday"
|
2
|
+
require "relaton_ogc/hit"
|
3
|
+
|
4
|
+
module RelatonOgc
|
5
|
+
class HitCollection < RelatonBib::HitCollection
|
6
|
+
ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"\
|
7
|
+
"NamingAuthority/master/incubation/bibliography/bibliography.json".freeze
|
8
|
+
DATAFILE = File.expand_path "data/bibliography.json", __dir__
|
9
|
+
ETAGFILE = File.expand_path "data/etag.txt", __dir__
|
10
|
+
|
11
|
+
# @param ref [Strig]
|
12
|
+
# @param year [String]
|
13
|
+
# @param opts [Hash]
|
14
|
+
def initialize(ref, year = nil, _opts = {})
|
15
|
+
@text = ref
|
16
|
+
@year = year
|
17
|
+
@fetched = false
|
18
|
+
hits = from_json(ref).sort_by do |hit|
|
19
|
+
hit.hit["date"] ? Date.parse(hit.hit["date"]) : Date.new
|
20
|
+
end
|
21
|
+
concat hits.reverse
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
#
|
27
|
+
# Fetch data form json
|
28
|
+
#
|
29
|
+
# @param docid [String]
|
30
|
+
def from_json(docid, **_opts)
|
31
|
+
ref = docid.sub /^OGC\s/, ""
|
32
|
+
data.select do |_k, doc|
|
33
|
+
doc["type"] != "CC" && doc["identifier"].include?(ref)
|
34
|
+
end.map { |_k, h| Hit.new(h, self) }
|
35
|
+
end
|
36
|
+
|
37
|
+
#
|
38
|
+
# Fetches json data
|
39
|
+
#
|
40
|
+
# @return [Hash]
|
41
|
+
def data
|
42
|
+
ctime = File.ctime DATAFILE if File.exist? DATAFILE
|
43
|
+
fetch_data if !ctime || ctime.to_date < Date.today
|
44
|
+
@data ||= JSON.parse File.read(DATAFILE, encoding: "UTF-8")
|
45
|
+
end
|
46
|
+
|
47
|
+
#
|
48
|
+
# fetch data form server and save it to file.
|
49
|
+
#
|
50
|
+
def fetch_data
|
51
|
+
resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
|
52
|
+
# return if there aren't any changes since last fetching
|
53
|
+
return unless resp.status == 200
|
54
|
+
|
55
|
+
self.etag = resp[:etag]
|
56
|
+
@data = JSON.parse resp.body
|
57
|
+
File.write DATAFILE, @data.to_json, encoding: "UTF-8"
|
58
|
+
end
|
59
|
+
|
60
|
+
#
|
61
|
+
# Read ETag form file
|
62
|
+
#
|
63
|
+
# @return [String, NilClass]
|
64
|
+
def etag
|
65
|
+
@etag ||= if File.exist? ETAGFILE
|
66
|
+
File.read ETAGFILE, encoding: "UTF-8"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
#
|
71
|
+
# Save ETag to file
|
72
|
+
#
|
73
|
+
# @param tag [String]
|
74
|
+
def etag=(e_tag)
|
75
|
+
File.write ETAGFILE, e_tag, encoding: "UTF-8"
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module RelatonOgc
|
2
|
+
class OgcBibliographicItem < RelatonIsoBib::IsoBibliographicItem
|
3
|
+
TYPES = %w[
|
4
|
+
standard standard-with-suite
|
5
|
+
abstract-specification best-practice candidate-standard conformance-class
|
6
|
+
change-request community-standard discussion-paper draft-discussion-paper
|
7
|
+
interoperability-program-report implementation-standard
|
8
|
+
public-engineering-report
|
9
|
+
].freeze
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module RelatonOgc
|
2
|
+
class OgcBibliography
|
3
|
+
class << self
|
4
|
+
# @param text [String]
|
5
|
+
# @return [RelatonOgc::HitCollection]
|
6
|
+
def search(text, year = nil, opts = {})
|
7
|
+
HitCollection.new text, year, opts
|
8
|
+
rescue Faraday::Error::ConnectionFailed
|
9
|
+
raise RelatonBib::RequestError, "Could not access https://www.nist.gov"
|
10
|
+
end
|
11
|
+
|
12
|
+
# @param code [String] the OGC standard Code to look up (e..g "8200")
|
13
|
+
# @param year [String] the year the standard was published (optional)
|
14
|
+
#
|
15
|
+
# @param opts [Hash] options
|
16
|
+
# @option opts [TrueClass, FalseClass] :all_parts restricted to all parts
|
17
|
+
# if all-parts reference is required
|
18
|
+
# @option opts [TrueClass, FalseClass] :bibdata
|
19
|
+
#
|
20
|
+
# @return [String] Relaton XML serialisation of reference
|
21
|
+
def get(code, year = nil, opts = {})
|
22
|
+
result = bib_search_filter(code, year, opts) || (return nil)
|
23
|
+
ret = bib_results_filter(result, year)
|
24
|
+
return ret[:ret] if ret[:ret]
|
25
|
+
|
26
|
+
fetch_ref_err(code, year, ret[:years])
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def bib_search_filter(code, year, opts)
|
32
|
+
warn "fetching #{code}..."
|
33
|
+
search(code, year, opts)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Sort through the results from RelatonNist, fetching them three at a time,
|
37
|
+
# and return the first result that matches the code,
|
38
|
+
# matches the year (if provided), and which # has a title (amendments do not).
|
39
|
+
# Only expects the first page of results to be populated.
|
40
|
+
# Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
|
41
|
+
# If no match, returns any years which caused mismatch, for error reporting
|
42
|
+
#
|
43
|
+
# @param result
|
44
|
+
# @param opts [Hash] options
|
45
|
+
#
|
46
|
+
# @return [Hash]
|
47
|
+
def bib_results_filter(result, year)
|
48
|
+
missed_years = []
|
49
|
+
result.each do |r|
|
50
|
+
item = r.fetch
|
51
|
+
return { ret: item } if !year
|
52
|
+
|
53
|
+
item.date.select { |d| d.type == "published" }.each do |d|
|
54
|
+
return { ret: item } if year.to_i == d.on.year
|
55
|
+
|
56
|
+
missed_years << d.on.year
|
57
|
+
end
|
58
|
+
end
|
59
|
+
{ years: missed_years }
|
60
|
+
end
|
61
|
+
|
62
|
+
# @param code [Strig]
|
63
|
+
# @param year [String]
|
64
|
+
# @param missed_years [Array<Strig>]
|
65
|
+
def fetch_ref_err(code, year, missed_years)
|
66
|
+
id = year ? "#{code} year #{year}" : code
|
67
|
+
warn "WARNING: no match found online for #{id}. "\
|
68
|
+
"The code must be exactly like it is on the standards website."
|
69
|
+
unless missed_years.empty?
|
70
|
+
warn "(There was no match for #{year}, though there were matches "\
|
71
|
+
"found for #{missed_years.join(', ')}.)"
|
72
|
+
end
|
73
|
+
nil
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require "relaton/processor"
|
2
|
+
|
3
|
+
module RelatonOgc
|
4
|
+
class Processor < Relaton::Processor
|
5
|
+
def initialize
|
6
|
+
@short = :relaton_ogc
|
7
|
+
@prefix = "OGC"
|
8
|
+
@defaultprefix = %r{^OGC\s}
|
9
|
+
@idtype = "OGC"
|
10
|
+
end
|
11
|
+
|
12
|
+
# @param code [String]
|
13
|
+
# @param date [String, NilClass] year
|
14
|
+
# @param opts [Hash]
|
15
|
+
# @return [RelatonOgc::OgcBibliographicItem]
|
16
|
+
def get(code, date = nil, opts = {})
|
17
|
+
::RelatonOgc::OgcBibliography.get(code, date, opts)
|
18
|
+
end
|
19
|
+
|
20
|
+
# @param xml [String]
|
21
|
+
# @return [RelatonOgc::OgcBibliographicItem]
|
22
|
+
def from_xml(xml)
|
23
|
+
::RelatonOgc::XMLParser.from_xml xml
|
24
|
+
end
|
25
|
+
|
26
|
+
# @param hash [Hash]
|
27
|
+
# @return [RelatonOgc::OgcBibliographicItem]
|
28
|
+
def hash_to_bib(hash)
|
29
|
+
item_hash = ::RelatonOgc::HashConverter.hash_to_bib(hash)
|
30
|
+
::RelatonOgc::OgcBibliographicItem.new item_hash
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
module RelatonOgc
|
2
|
+
module Scrapper
|
3
|
+
TYPES = {
|
4
|
+
"AS" => "Abstract Specification",
|
5
|
+
"BP" => "Best Practice",
|
6
|
+
"CAN" => "Candidate Standard",
|
7
|
+
"CC" => "Conformance Class",
|
8
|
+
"CR" => "Change Request",
|
9
|
+
"CS" => "Community Standard",
|
10
|
+
"DP" => "Discussion Paper",
|
11
|
+
"DP-Draft" => "Draft Discussion Paper",
|
12
|
+
"IPR" => "Interoperability Program Report - Engineering Specification",
|
13
|
+
"IS" => "Implementation Standard",
|
14
|
+
"ISC" => "Implementation Standard Corrigendum",
|
15
|
+
"ISx" => "Extension Package Standard",
|
16
|
+
"Notes" => "Notes",
|
17
|
+
"ORM" => "OGC Reference Model",
|
18
|
+
"PC" => "Profile Corrigendum",
|
19
|
+
"PER" => "Public Engineering Report",
|
20
|
+
"POL" => "Policy",
|
21
|
+
"POL-NTS" => "Policy - Name Type Specification",
|
22
|
+
"Primer" => "Primer",
|
23
|
+
"Profile" => "Profile",
|
24
|
+
"RFC" => "Request for Comment",
|
25
|
+
"Retired" => "Retired document",
|
26
|
+
"SAP" => "Standard Application Profile",
|
27
|
+
"TS" => "Test Suite",
|
28
|
+
"WhitePaper" => "Whitepaper",
|
29
|
+
}.freeze
|
30
|
+
|
31
|
+
class << self
|
32
|
+
# papam hit [Hash]
|
33
|
+
# @return [RelatonOgc::OrcBibliographicItem]
|
34
|
+
def parse_page(hit)
|
35
|
+
OgcBibliographicItem.new(
|
36
|
+
title: fetch_title(hit["title"]),
|
37
|
+
docid: fetch_docid(hit["identifier"]),
|
38
|
+
link: fetch_link(hit["URL"]),
|
39
|
+
type: fetch_type(hit["type"]),
|
40
|
+
edition: fetch_edition(hit["identifier"]),
|
41
|
+
abstract: fetch_abstract(hit["description"]),
|
42
|
+
contributor: fetch_contributor(hit),
|
43
|
+
language: ["en"],
|
44
|
+
script: ["Latn"],
|
45
|
+
date: fetch_date(hit["date"]),
|
46
|
+
)
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
# @param title [String]
|
52
|
+
# @return [Array<RelatonIsoBib::TypedTitleString>]
|
53
|
+
def fetch_title(title)
|
54
|
+
[
|
55
|
+
RelatonIsoBib::TypedTitleString.new(
|
56
|
+
type: "title-main", content: title, language: "en", script: "Latn",
|
57
|
+
format: "text/plain"
|
58
|
+
),
|
59
|
+
RelatonIsoBib::TypedTitleString.new(
|
60
|
+
type: "main", content: title, language: "en", script: "Latn",
|
61
|
+
format: "text/plain"
|
62
|
+
),
|
63
|
+
]
|
64
|
+
end
|
65
|
+
|
66
|
+
# @param identifier [String]
|
67
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
68
|
+
def fetch_docid(identifier)
|
69
|
+
[RelatonBib::DocumentIdentifier.new(id: identifier, type: "OGC")]
|
70
|
+
end
|
71
|
+
|
72
|
+
# @param url [String]
|
73
|
+
# @return [Array>RelatonBib::TypedUri>]
|
74
|
+
def fetch_link(url)
|
75
|
+
[RelatonBib::TypedUri.new(type: "obp", content: url)]
|
76
|
+
end
|
77
|
+
|
78
|
+
# @param type [String]
|
79
|
+
# @return [String]
|
80
|
+
def fetch_type(type)
|
81
|
+
TYPES[type.sub(/^D-/, "")].downcase.gsub " ", "-"
|
82
|
+
end
|
83
|
+
|
84
|
+
# @param identifier [String]
|
85
|
+
# @return [String]
|
86
|
+
def fetch_edition(identifier)
|
87
|
+
%r{(?<=r)(?<edition>\d+)$} =~ identifier
|
88
|
+
edition
|
89
|
+
end
|
90
|
+
|
91
|
+
# @param description [String]
|
92
|
+
# @return [Array<RelatonBib::FormattedString>]
|
93
|
+
def fetch_abstract(description)
|
94
|
+
[RelatonBib::FormattedString.new(content: description, language: "en",
|
95
|
+
script: "Latn")]
|
96
|
+
end
|
97
|
+
|
98
|
+
# @param doc [Hash]
|
99
|
+
# @return [Array<RelatonBib::ContributionInfo>]
|
100
|
+
def fetch_contributor(doc)
|
101
|
+
contribs = doc["creator"].to_s.split(", ").map do |name|
|
102
|
+
personn_contrib name
|
103
|
+
end
|
104
|
+
contribs << org_contrib(doc["publisher"]) if doc["publisher"]
|
105
|
+
end
|
106
|
+
|
107
|
+
# @param name [String]
|
108
|
+
# @return [RelatonBib::ContributionInfo]
|
109
|
+
def personn_contrib(name)
|
110
|
+
fname = RelatonBib::FullName.new(
|
111
|
+
completename: RelatonBib::LocalizedString.new(name),
|
112
|
+
)
|
113
|
+
entity = RelatonBib::Person.new(name: fname)
|
114
|
+
RelatonBib::ContributionInfo.new(
|
115
|
+
entity: entity, role: [type: "author"],
|
116
|
+
)
|
117
|
+
end
|
118
|
+
|
119
|
+
# @param name [String]
|
120
|
+
# @return [RelatonBib::ContributionInfo]
|
121
|
+
def org_contrib(name)
|
122
|
+
entity = RelatonBib::Organization.new(name: name)
|
123
|
+
RelatonBib::ContributionInfo.new(
|
124
|
+
entity: entity, role: [type: "publisher"],
|
125
|
+
)
|
126
|
+
end
|
127
|
+
|
128
|
+
# @param date [String]
|
129
|
+
# @return [Array<RelatonBib::BibliographicDate>]
|
130
|
+
def fetch_date(date)
|
131
|
+
[RelatonBib::BibliographicDate.new(type: "published", on: date)]
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
|
3
|
+
module RelatonOgc
|
4
|
+
class XMLParser < RelatonIsoBib::XMLParser
|
5
|
+
class << self
|
6
|
+
# Override RelatonIsoBib::XMLParser.form_xml method.
|
7
|
+
# @param xml [String]
|
8
|
+
# @return [RelatonOgc::OgcBibliographicItem]
|
9
|
+
def from_xml(xml)
|
10
|
+
doc = Nokogiri::XML(xml)
|
11
|
+
isoitem = doc.at "/bibitem|/bibdata"
|
12
|
+
OgcBibliographicItem.new item_data(isoitem)
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
# @TODO Organization doesn't recreated
|
18
|
+
# @param ext [Nokogiri::XML::Element]
|
19
|
+
# @return [RelatonIsoBib::EditorialGroup]
|
20
|
+
def fetch_editorialgroup(ext)
|
21
|
+
eg = ext.at("./editorialgroup")
|
22
|
+
return unless eg
|
23
|
+
|
24
|
+
committe = eg&.at("committee")&.text
|
25
|
+
sc = iso_subgroup eg&.at("subcommittee")
|
26
|
+
wg = iso_subgroup eg&.at("workgroup")
|
27
|
+
EditorialGroup.new(
|
28
|
+
committee: committe, subcommittee: sc, workgroup: wg,
|
29
|
+
)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/relaton_ogc.gemspec
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
lib = File.expand_path("lib", __dir__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require "relaton_ogc/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "relaton-ogc"
|
7
|
+
spec.version = RelatonOgc::VERSION
|
8
|
+
spec.authors = ["Ribose Inc."]
|
9
|
+
spec.email = ["open.source@ribose.com"]
|
10
|
+
|
11
|
+
spec.summary = "RelatonOgc: retrieve OGC Standards for bibliographic "\
|
12
|
+
"use using the OgcBibliographicItem model"
|
13
|
+
spec.description = "RelatonIso: retrieve ISO Standards for bibliographic "\
|
14
|
+
"use using the IsoBibliographicItem model"
|
15
|
+
spec.homepage = "https://github.com/relaton/relaton-ogc"
|
16
|
+
spec.license = "BSD-2-Clause"
|
17
|
+
|
18
|
+
# Specify which files should be added to the gem when it is released.
|
19
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
20
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
21
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
22
|
+
end
|
23
|
+
spec.bindir = "exe"
|
24
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
25
|
+
spec.require_paths = ["lib"]
|
26
|
+
|
27
|
+
spec.add_development_dependency "bundler", "~> 2.0"
|
28
|
+
spec.add_development_dependency "debase"
|
29
|
+
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
30
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
31
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
32
|
+
spec.add_development_dependency "ruby-debug-ide"
|
33
|
+
spec.add_development_dependency "simplecov"
|
34
|
+
spec.add_development_dependency "vcr"
|
35
|
+
spec.add_development_dependency "webmock"
|
36
|
+
|
37
|
+
spec.add_dependency "faraday"
|
38
|
+
spec.add_dependency "relaton-iso-bib", "~> 0.3.0"
|
39
|
+
end
|