relaton-ogc 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.rspec +3 -0
- data/.rubocop.yml +10 -0
- data/.travis.yml +18 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +84 -0
- data/LICENSE.txt +21 -0
- data/README.adoc +117 -0
- data/Rakefile +6 -0
- data/appveyor.yml +37 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/relaton_ogc.rb +14 -0
- data/lib/relaton_ogc/data/bibliography.json +1 -0
- data/lib/relaton_ogc/data/etag.txt +1 -0
- data/lib/relaton_ogc/editorial_group.rb +66 -0
- data/lib/relaton_ogc/hash_converter.rb +20 -0
- data/lib/relaton_ogc/hit.rb +12 -0
- data/lib/relaton_ogc/hit_collection.rb +78 -0
- data/lib/relaton_ogc/ogc_bibliographic_item.rb +11 -0
- data/lib/relaton_ogc/ogc_bibliography.rb +77 -0
- data/lib/relaton_ogc/processor.rb +33 -0
- data/lib/relaton_ogc/scrapper.rb +135 -0
- data/lib/relaton_ogc/version.rb +3 -0
- data/lib/relaton_ogc/xml_parser.rb +33 -0
- data/relaton_ogc.gemspec +39 -0
- metadata +226 -0
@@ -0,0 +1,20 @@
|
|
1
|
+
module RelatonOgc
|
2
|
+
class HashConverter < RelatonIsoBib::HashConverter
|
3
|
+
class << self
|
4
|
+
private
|
5
|
+
|
6
|
+
# @param ret [Hash]
|
7
|
+
def editorialgroup_hash_to_bib(ret)
|
8
|
+
eg = ret[:editorialgroup]
|
9
|
+
return unless eg
|
10
|
+
|
11
|
+
ret[:editorialgroup] = EditorialGroup.new(
|
12
|
+
committee: eg[:committee],
|
13
|
+
subcommittee: eg[:subcommittee],
|
14
|
+
workgroup: eg[:workgroup],
|
15
|
+
secretariat: eg[:secretariat],
|
16
|
+
)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require "faraday"
|
2
|
+
require "relaton_ogc/hit"
|
3
|
+
|
4
|
+
module RelatonOgc
|
5
|
+
class HitCollection < RelatonBib::HitCollection
|
6
|
+
ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"\
|
7
|
+
"NamingAuthority/master/incubation/bibliography/bibliography.json".freeze
|
8
|
+
DATAFILE = File.expand_path "data/bibliography.json", __dir__
|
9
|
+
ETAGFILE = File.expand_path "data/etag.txt", __dir__
|
10
|
+
|
11
|
+
# @param ref [Strig]
|
12
|
+
# @param year [String]
|
13
|
+
# @param opts [Hash]
|
14
|
+
def initialize(ref, year = nil, _opts = {})
|
15
|
+
@text = ref
|
16
|
+
@year = year
|
17
|
+
@fetched = false
|
18
|
+
hits = from_json(ref).sort_by do |hit|
|
19
|
+
hit.hit["date"] ? Date.parse(hit.hit["date"]) : Date.new
|
20
|
+
end
|
21
|
+
concat hits.reverse
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
#
|
27
|
+
# Fetch data form json
|
28
|
+
#
|
29
|
+
# @param docid [String]
|
30
|
+
def from_json(docid, **_opts)
|
31
|
+
ref = docid.sub /^OGC\s/, ""
|
32
|
+
data.select do |_k, doc|
|
33
|
+
doc["type"] != "CC" && doc["identifier"].include?(ref)
|
34
|
+
end.map { |_k, h| Hit.new(h, self) }
|
35
|
+
end
|
36
|
+
|
37
|
+
#
|
38
|
+
# Fetches json data
|
39
|
+
#
|
40
|
+
# @return [Hash]
|
41
|
+
def data
|
42
|
+
ctime = File.ctime DATAFILE if File.exist? DATAFILE
|
43
|
+
fetch_data if !ctime || ctime.to_date < Date.today
|
44
|
+
@data ||= JSON.parse File.read(DATAFILE, encoding: "UTF-8")
|
45
|
+
end
|
46
|
+
|
47
|
+
#
|
48
|
+
# fetch data form server and save it to file.
|
49
|
+
#
|
50
|
+
def fetch_data
|
51
|
+
resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
|
52
|
+
# return if there aren't any changes since last fetching
|
53
|
+
return unless resp.status == 200
|
54
|
+
|
55
|
+
self.etag = resp[:etag]
|
56
|
+
@data = JSON.parse resp.body
|
57
|
+
File.write DATAFILE, @data.to_json, encoding: "UTF-8"
|
58
|
+
end
|
59
|
+
|
60
|
+
#
|
61
|
+
# Read ETag form file
|
62
|
+
#
|
63
|
+
# @return [String, NilClass]
|
64
|
+
def etag
|
65
|
+
@etag ||= if File.exist? ETAGFILE
|
66
|
+
File.read ETAGFILE, encoding: "UTF-8"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
#
|
71
|
+
# Save ETag to file
|
72
|
+
#
|
73
|
+
# @param tag [String]
|
74
|
+
def etag=(e_tag)
|
75
|
+
File.write ETAGFILE, e_tag, encoding: "UTF-8"
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module RelatonOgc
|
2
|
+
class OgcBibliographicItem < RelatonIsoBib::IsoBibliographicItem
|
3
|
+
TYPES = %w[
|
4
|
+
standard standard-with-suite
|
5
|
+
abstract-specification best-practice candidate-standard conformance-class
|
6
|
+
change-request community-standard discussion-paper draft-discussion-paper
|
7
|
+
interoperability-program-report implementation-standard
|
8
|
+
public-engineering-report
|
9
|
+
].freeze
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module RelatonOgc
|
2
|
+
class OgcBibliography
|
3
|
+
class << self
|
4
|
+
# @param text [String]
|
5
|
+
# @return [RelatonOgc::HitCollection]
|
6
|
+
def search(text, year = nil, opts = {})
|
7
|
+
HitCollection.new text, year, opts
|
8
|
+
rescue Faraday::Error::ConnectionFailed
|
9
|
+
raise RelatonBib::RequestError, "Could not access https://www.nist.gov"
|
10
|
+
end
|
11
|
+
|
12
|
+
# @param code [String] the OGC standard Code to look up (e..g "8200")
|
13
|
+
# @param year [String] the year the standard was published (optional)
|
14
|
+
#
|
15
|
+
# @param opts [Hash] options
|
16
|
+
# @option opts [TrueClass, FalseClass] :all_parts restricted to all parts
|
17
|
+
# if all-parts reference is required
|
18
|
+
# @option opts [TrueClass, FalseClass] :bibdata
|
19
|
+
#
|
20
|
+
# @return [String] Relaton XML serialisation of reference
|
21
|
+
def get(code, year = nil, opts = {})
|
22
|
+
result = bib_search_filter(code, year, opts) || (return nil)
|
23
|
+
ret = bib_results_filter(result, year)
|
24
|
+
return ret[:ret] if ret[:ret]
|
25
|
+
|
26
|
+
fetch_ref_err(code, year, ret[:years])
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def bib_search_filter(code, year, opts)
|
32
|
+
warn "fetching #{code}..."
|
33
|
+
search(code, year, opts)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Sort through the results from RelatonNist, fetching them three at a time,
|
37
|
+
# and return the first result that matches the code,
|
38
|
+
# matches the year (if provided), and which # has a title (amendments do not).
|
39
|
+
# Only expects the first page of results to be populated.
|
40
|
+
# Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
|
41
|
+
# If no match, returns any years which caused mismatch, for error reporting
|
42
|
+
#
|
43
|
+
# @param result
|
44
|
+
# @param opts [Hash] options
|
45
|
+
#
|
46
|
+
# @return [Hash]
|
47
|
+
def bib_results_filter(result, year)
|
48
|
+
missed_years = []
|
49
|
+
result.each do |r|
|
50
|
+
item = r.fetch
|
51
|
+
return { ret: item } if !year
|
52
|
+
|
53
|
+
item.date.select { |d| d.type == "published" }.each do |d|
|
54
|
+
return { ret: item } if year.to_i == d.on.year
|
55
|
+
|
56
|
+
missed_years << d.on.year
|
57
|
+
end
|
58
|
+
end
|
59
|
+
{ years: missed_years }
|
60
|
+
end
|
61
|
+
|
62
|
+
# @param code [Strig]
|
63
|
+
# @param year [String]
|
64
|
+
# @param missed_years [Array<Strig>]
|
65
|
+
def fetch_ref_err(code, year, missed_years)
|
66
|
+
id = year ? "#{code} year #{year}" : code
|
67
|
+
warn "WARNING: no match found online for #{id}. "\
|
68
|
+
"The code must be exactly like it is on the standards website."
|
69
|
+
unless missed_years.empty?
|
70
|
+
warn "(There was no match for #{year}, though there were matches "\
|
71
|
+
"found for #{missed_years.join(', ')}.)"
|
72
|
+
end
|
73
|
+
nil
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require "relaton/processor"
|
2
|
+
|
3
|
+
module RelatonOgc
|
4
|
+
class Processor < Relaton::Processor
|
5
|
+
def initialize
|
6
|
+
@short = :relaton_ogc
|
7
|
+
@prefix = "OGC"
|
8
|
+
@defaultprefix = %r{^OGC\s}
|
9
|
+
@idtype = "OGC"
|
10
|
+
end
|
11
|
+
|
12
|
+
# @param code [String]
|
13
|
+
# @param date [String, NilClass] year
|
14
|
+
# @param opts [Hash]
|
15
|
+
# @return [RelatonOgc::OgcBibliographicItem]
|
16
|
+
def get(code, date = nil, opts = {})
|
17
|
+
::RelatonOgc::OgcBibliography.get(code, date, opts)
|
18
|
+
end
|
19
|
+
|
20
|
+
# @param xml [String]
|
21
|
+
# @return [RelatonOgc::OgcBibliographicItem]
|
22
|
+
def from_xml(xml)
|
23
|
+
::RelatonOgc::XMLParser.from_xml xml
|
24
|
+
end
|
25
|
+
|
26
|
+
# @param hash [Hash]
|
27
|
+
# @return [RelatonOgc::OgcBibliographicItem]
|
28
|
+
def hash_to_bib(hash)
|
29
|
+
item_hash = ::RelatonOgc::HashConverter.hash_to_bib(hash)
|
30
|
+
::RelatonOgc::OgcBibliographicItem.new item_hash
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
module RelatonOgc
|
2
|
+
module Scrapper
|
3
|
+
TYPES = {
|
4
|
+
"AS" => "Abstract Specification",
|
5
|
+
"BP" => "Best Practice",
|
6
|
+
"CAN" => "Candidate Standard",
|
7
|
+
"CC" => "Conformance Class",
|
8
|
+
"CR" => "Change Request",
|
9
|
+
"CS" => "Community Standard",
|
10
|
+
"DP" => "Discussion Paper",
|
11
|
+
"DP-Draft" => "Draft Discussion Paper",
|
12
|
+
"IPR" => "Interoperability Program Report - Engineering Specification",
|
13
|
+
"IS" => "Implementation Standard",
|
14
|
+
"ISC" => "Implementation Standard Corrigendum",
|
15
|
+
"ISx" => "Extension Package Standard",
|
16
|
+
"Notes" => "Notes",
|
17
|
+
"ORM" => "OGC Reference Model",
|
18
|
+
"PC" => "Profile Corrigendum",
|
19
|
+
"PER" => "Public Engineering Report",
|
20
|
+
"POL" => "Policy",
|
21
|
+
"POL-NTS" => "Policy - Name Type Specification",
|
22
|
+
"Primer" => "Primer",
|
23
|
+
"Profile" => "Profile",
|
24
|
+
"RFC" => "Request for Comment",
|
25
|
+
"Retired" => "Retired document",
|
26
|
+
"SAP" => "Standard Application Profile",
|
27
|
+
"TS" => "Test Suite",
|
28
|
+
"WhitePaper" => "Whitepaper",
|
29
|
+
}.freeze
|
30
|
+
|
31
|
+
class << self
|
32
|
+
# papam hit [Hash]
|
33
|
+
# @return [RelatonOgc::OrcBibliographicItem]
|
34
|
+
def parse_page(hit)
|
35
|
+
OgcBibliographicItem.new(
|
36
|
+
title: fetch_title(hit["title"]),
|
37
|
+
docid: fetch_docid(hit["identifier"]),
|
38
|
+
link: fetch_link(hit["URL"]),
|
39
|
+
type: fetch_type(hit["type"]),
|
40
|
+
edition: fetch_edition(hit["identifier"]),
|
41
|
+
abstract: fetch_abstract(hit["description"]),
|
42
|
+
contributor: fetch_contributor(hit),
|
43
|
+
language: ["en"],
|
44
|
+
script: ["Latn"],
|
45
|
+
date: fetch_date(hit["date"]),
|
46
|
+
)
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
# @param title [String]
|
52
|
+
# @return [Array<RelatonIsoBib::TypedTitleString>]
|
53
|
+
def fetch_title(title)
|
54
|
+
[
|
55
|
+
RelatonIsoBib::TypedTitleString.new(
|
56
|
+
type: "title-main", content: title, language: "en", script: "Latn",
|
57
|
+
format: "text/plain"
|
58
|
+
),
|
59
|
+
RelatonIsoBib::TypedTitleString.new(
|
60
|
+
type: "main", content: title, language: "en", script: "Latn",
|
61
|
+
format: "text/plain"
|
62
|
+
),
|
63
|
+
]
|
64
|
+
end
|
65
|
+
|
66
|
+
# @param identifier [String]
|
67
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
68
|
+
def fetch_docid(identifier)
|
69
|
+
[RelatonBib::DocumentIdentifier.new(id: identifier, type: "OGC")]
|
70
|
+
end
|
71
|
+
|
72
|
+
# @param url [String]
|
73
|
+
# @return [Array>RelatonBib::TypedUri>]
|
74
|
+
def fetch_link(url)
|
75
|
+
[RelatonBib::TypedUri.new(type: "obp", content: url)]
|
76
|
+
end
|
77
|
+
|
78
|
+
# @param type [String]
|
79
|
+
# @return [String]
|
80
|
+
def fetch_type(type)
|
81
|
+
TYPES[type.sub(/^D-/, "")].downcase.gsub " ", "-"
|
82
|
+
end
|
83
|
+
|
84
|
+
# @param identifier [String]
|
85
|
+
# @return [String]
|
86
|
+
def fetch_edition(identifier)
|
87
|
+
%r{(?<=r)(?<edition>\d+)$} =~ identifier
|
88
|
+
edition
|
89
|
+
end
|
90
|
+
|
91
|
+
# @param description [String]
|
92
|
+
# @return [Array<RelatonBib::FormattedString>]
|
93
|
+
def fetch_abstract(description)
|
94
|
+
[RelatonBib::FormattedString.new(content: description, language: "en",
|
95
|
+
script: "Latn")]
|
96
|
+
end
|
97
|
+
|
98
|
+
# @param doc [Hash]
|
99
|
+
# @return [Array<RelatonBib::ContributionInfo>]
|
100
|
+
def fetch_contributor(doc)
|
101
|
+
contribs = doc["creator"].to_s.split(", ").map do |name|
|
102
|
+
personn_contrib name
|
103
|
+
end
|
104
|
+
contribs << org_contrib(doc["publisher"]) if doc["publisher"]
|
105
|
+
end
|
106
|
+
|
107
|
+
# @param name [String]
|
108
|
+
# @return [RelatonBib::ContributionInfo]
|
109
|
+
def personn_contrib(name)
|
110
|
+
fname = RelatonBib::FullName.new(
|
111
|
+
completename: RelatonBib::LocalizedString.new(name),
|
112
|
+
)
|
113
|
+
entity = RelatonBib::Person.new(name: fname)
|
114
|
+
RelatonBib::ContributionInfo.new(
|
115
|
+
entity: entity, role: [type: "author"],
|
116
|
+
)
|
117
|
+
end
|
118
|
+
|
119
|
+
# @param name [String]
|
120
|
+
# @return [RelatonBib::ContributionInfo]
|
121
|
+
def org_contrib(name)
|
122
|
+
entity = RelatonBib::Organization.new(name: name)
|
123
|
+
RelatonBib::ContributionInfo.new(
|
124
|
+
entity: entity, role: [type: "publisher"],
|
125
|
+
)
|
126
|
+
end
|
127
|
+
|
128
|
+
# @param date [String]
|
129
|
+
# @return [Array<RelatonBib::BibliographicDate>]
|
130
|
+
def fetch_date(date)
|
131
|
+
[RelatonBib::BibliographicDate.new(type: "published", on: date)]
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
|
3
|
+
module RelatonOgc
|
4
|
+
class XMLParser < RelatonIsoBib::XMLParser
|
5
|
+
class << self
|
6
|
+
# Override RelatonIsoBib::XMLParser.form_xml method.
|
7
|
+
# @param xml [String]
|
8
|
+
# @return [RelatonOgc::OgcBibliographicItem]
|
9
|
+
def from_xml(xml)
|
10
|
+
doc = Nokogiri::XML(xml)
|
11
|
+
isoitem = doc.at "/bibitem|/bibdata"
|
12
|
+
OgcBibliographicItem.new item_data(isoitem)
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
# @TODO Organization doesn't recreated
|
18
|
+
# @param ext [Nokogiri::XML::Element]
|
19
|
+
# @return [RelatonIsoBib::EditorialGroup]
|
20
|
+
def fetch_editorialgroup(ext)
|
21
|
+
eg = ext.at("./editorialgroup")
|
22
|
+
return unless eg
|
23
|
+
|
24
|
+
committe = eg&.at("committee")&.text
|
25
|
+
sc = iso_subgroup eg&.at("subcommittee")
|
26
|
+
wg = iso_subgroup eg&.at("workgroup")
|
27
|
+
EditorialGroup.new(
|
28
|
+
committee: committe, subcommittee: sc, workgroup: wg,
|
29
|
+
)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/relaton_ogc.gemspec
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
lib = File.expand_path("lib", __dir__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require "relaton_ogc/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "relaton-ogc"
|
7
|
+
spec.version = RelatonOgc::VERSION
|
8
|
+
spec.authors = ["Ribose Inc."]
|
9
|
+
spec.email = ["open.source@ribose.com"]
|
10
|
+
|
11
|
+
spec.summary = "RelatonOgc: retrieve OGC Standards for bibliographic "\
|
12
|
+
"use using the OgcBibliographicItem model"
|
13
|
+
spec.description = "RelatonIso: retrieve ISO Standards for bibliographic "\
|
14
|
+
"use using the IsoBibliographicItem model"
|
15
|
+
spec.homepage = "https://github.com/relaton/relaton-ogc"
|
16
|
+
spec.license = "BSD-2-Clause"
|
17
|
+
|
18
|
+
# Specify which files should be added to the gem when it is released.
|
19
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
20
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
21
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
22
|
+
end
|
23
|
+
spec.bindir = "exe"
|
24
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
25
|
+
spec.require_paths = ["lib"]
|
26
|
+
|
27
|
+
spec.add_development_dependency "bundler", "~> 2.0"
|
28
|
+
spec.add_development_dependency "debase"
|
29
|
+
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
30
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
31
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
32
|
+
spec.add_development_dependency "ruby-debug-ide"
|
33
|
+
spec.add_development_dependency "simplecov"
|
34
|
+
spec.add_development_dependency "vcr"
|
35
|
+
spec.add_development_dependency "webmock"
|
36
|
+
|
37
|
+
spec.add_dependency "faraday"
|
38
|
+
spec.add_dependency "relaton-iso-bib", "~> 0.3.0"
|
39
|
+
end
|