relaton-ogc 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ module RelatonOgc
2
+ class HashConverter < RelatonIsoBib::HashConverter
3
+ class << self
4
+ private
5
+
6
+ # @param ret [Hash]
7
+ def editorialgroup_hash_to_bib(ret)
8
+ eg = ret[:editorialgroup]
9
+ return unless eg
10
+
11
+ ret[:editorialgroup] = EditorialGroup.new(
12
+ committee: eg[:committee],
13
+ subcommittee: eg[:subcommittee],
14
+ workgroup: eg[:workgroup],
15
+ secretariat: eg[:secretariat],
16
+ )
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,12 @@
1
+ module RelatonOgc
2
+ class Hit < RelatonBib::Hit
3
+ # @return [RelatonNist::HitCollection]
4
+ attr_reader :hit_collection
5
+
6
+ # Parse page.
7
+ # @return [RelatonNist::NistBliographicItem]
8
+ def fetch
9
+ @fetch ||= Scrapper.parse_page @hit
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,78 @@
1
+ require "faraday"
2
+ require "relaton_ogc/hit"
3
+
4
+ module RelatonOgc
5
+ class HitCollection < RelatonBib::HitCollection
6
+ ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"\
7
+ "NamingAuthority/master/incubation/bibliography/bibliography.json".freeze
8
+ DATAFILE = File.expand_path "data/bibliography.json", __dir__
9
+ ETAGFILE = File.expand_path "data/etag.txt", __dir__
10
+
11
+ # @param ref [Strig]
12
+ # @param year [String]
13
+ # @param opts [Hash]
14
+ def initialize(ref, year = nil, _opts = {})
15
+ @text = ref
16
+ @year = year
17
+ @fetched = false
18
+ hits = from_json(ref).sort_by do |hit|
19
+ hit.hit["date"] ? Date.parse(hit.hit["date"]) : Date.new
20
+ end
21
+ concat hits.reverse
22
+ end
23
+
24
+ private
25
+
26
+ #
27
+ # Fetch data form json
28
+ #
29
+ # @param docid [String]
30
+ def from_json(docid, **_opts)
31
+ ref = docid.sub /^OGC\s/, ""
32
+ data.select do |_k, doc|
33
+ doc["type"] != "CC" && doc["identifier"].include?(ref)
34
+ end.map { |_k, h| Hit.new(h, self) }
35
+ end
36
+
37
+ #
38
+ # Fetches json data
39
+ #
40
+ # @return [Hash]
41
+ def data
42
+ ctime = File.ctime DATAFILE if File.exist? DATAFILE
43
+ fetch_data if !ctime || ctime.to_date < Date.today
44
+ @data ||= JSON.parse File.read(DATAFILE, encoding: "UTF-8")
45
+ end
46
+
47
+ #
48
+ # fetch data form server and save it to file.
49
+ #
50
+ def fetch_data
51
+ resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
52
+ # return if there aren't any changes since last fetching
53
+ return unless resp.status == 200
54
+
55
+ self.etag = resp[:etag]
56
+ @data = JSON.parse resp.body
57
+ File.write DATAFILE, @data.to_json, encoding: "UTF-8"
58
+ end
59
+
60
+ #
61
+ # Read ETag form file
62
+ #
63
+ # @return [String, NilClass]
64
+ def etag
65
+ @etag ||= if File.exist? ETAGFILE
66
+ File.read ETAGFILE, encoding: "UTF-8"
67
+ end
68
+ end
69
+
70
+ #
71
+ # Save ETag to file
72
+ #
73
+ # @param tag [String]
74
+ def etag=(e_tag)
75
+ File.write ETAGFILE, e_tag, encoding: "UTF-8"
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,11 @@
1
+ module RelatonOgc
2
+ class OgcBibliographicItem < RelatonIsoBib::IsoBibliographicItem
3
+ TYPES = %w[
4
+ standard standard-with-suite
5
+ abstract-specification best-practice candidate-standard conformance-class
6
+ change-request community-standard discussion-paper draft-discussion-paper
7
+ interoperability-program-report implementation-standard
8
+ public-engineering-report
9
+ ].freeze
10
+ end
11
+ end
@@ -0,0 +1,77 @@
1
+ module RelatonOgc
2
+ class OgcBibliography
3
+ class << self
4
+ # @param text [String]
5
+ # @return [RelatonOgc::HitCollection]
6
+ def search(text, year = nil, opts = {})
7
+ HitCollection.new text, year, opts
8
+ rescue Faraday::Error::ConnectionFailed
9
+ raise RelatonBib::RequestError, "Could not access https://www.nist.gov"
10
+ end
11
+
12
+ # @param code [String] the OGC standard Code to look up (e..g "8200")
13
+ # @param year [String] the year the standard was published (optional)
14
+ #
15
+ # @param opts [Hash] options
16
+ # @option opts [TrueClass, FalseClass] :all_parts restricted to all parts
17
+ # if all-parts reference is required
18
+ # @option opts [TrueClass, FalseClass] :bibdata
19
+ #
20
+ # @return [String] Relaton XML serialisation of reference
21
+ def get(code, year = nil, opts = {})
22
+ result = bib_search_filter(code, year, opts) || (return nil)
23
+ ret = bib_results_filter(result, year)
24
+ return ret[:ret] if ret[:ret]
25
+
26
+ fetch_ref_err(code, year, ret[:years])
27
+ end
28
+
29
+ private
30
+
31
+ def bib_search_filter(code, year, opts)
32
+ warn "fetching #{code}..."
33
+ search(code, year, opts)
34
+ end
35
+
36
+ # Sort through the results from RelatonNist, fetching them three at a time,
37
+ # and return the first result that matches the code,
38
+ # matches the year (if provided), and which # has a title (amendments do not).
39
+ # Only expects the first page of results to be populated.
40
+ # Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
41
+ # If no match, returns any years which caused mismatch, for error reporting
42
+ #
43
+ # @param result
44
+ # @param opts [Hash] options
45
+ #
46
+ # @return [Hash]
47
+ def bib_results_filter(result, year)
48
+ missed_years = []
49
+ result.each do |r|
50
+ item = r.fetch
51
+ return { ret: item } if !year
52
+
53
+ item.date.select { |d| d.type == "published" }.each do |d|
54
+ return { ret: item } if year.to_i == d.on.year
55
+
56
+ missed_years << d.on.year
57
+ end
58
+ end
59
+ { years: missed_years }
60
+ end
61
+
62
+ # @param code [Strig]
63
+ # @param year [String]
64
+ # @param missed_years [Array<Strig>]
65
+ def fetch_ref_err(code, year, missed_years)
66
+ id = year ? "#{code} year #{year}" : code
67
+ warn "WARNING: no match found online for #{id}. "\
68
+ "The code must be exactly like it is on the standards website."
69
+ unless missed_years.empty?
70
+ warn "(There was no match for #{year}, though there were matches "\
71
+ "found for #{missed_years.join(', ')}.)"
72
+ end
73
+ nil
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,33 @@
1
+ require "relaton/processor"
2
+
3
+ module RelatonOgc
4
+ class Processor < Relaton::Processor
5
+ def initialize
6
+ @short = :relaton_ogc
7
+ @prefix = "OGC"
8
+ @defaultprefix = %r{^OGC\s}
9
+ @idtype = "OGC"
10
+ end
11
+
12
+ # @param code [String]
13
+ # @param date [String, NilClass] year
14
+ # @param opts [Hash]
15
+ # @return [RelatonOgc::OgcBibliographicItem]
16
+ def get(code, date = nil, opts = {})
17
+ ::RelatonOgc::OgcBibliography.get(code, date, opts)
18
+ end
19
+
20
+ # @param xml [String]
21
+ # @return [RelatonOgc::OgcBibliographicItem]
22
+ def from_xml(xml)
23
+ ::RelatonOgc::XMLParser.from_xml xml
24
+ end
25
+
26
+ # @param hash [Hash]
27
+ # @return [RelatonOgc::OgcBibliographicItem]
28
+ def hash_to_bib(hash)
29
+ item_hash = ::RelatonOgc::HashConverter.hash_to_bib(hash)
30
+ ::RelatonOgc::OgcBibliographicItem.new item_hash
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,135 @@
1
+ module RelatonOgc
2
+ module Scrapper
3
+ TYPES = {
4
+ "AS" => "Abstract Specification",
5
+ "BP" => "Best Practice",
6
+ "CAN" => "Candidate Standard",
7
+ "CC" => "Conformance Class",
8
+ "CR" => "Change Request",
9
+ "CS" => "Community Standard",
10
+ "DP" => "Discussion Paper",
11
+ "DP-Draft" => "Draft Discussion Paper",
12
+ "IPR" => "Interoperability Program Report - Engineering Specification",
13
+ "IS" => "Implementation Standard",
14
+ "ISC" => "Implementation Standard Corrigendum",
15
+ "ISx" => "Extension Package Standard",
16
+ "Notes" => "Notes",
17
+ "ORM" => "OGC Reference Model",
18
+ "PC" => "Profile Corrigendum",
19
+ "PER" => "Public Engineering Report",
20
+ "POL" => "Policy",
21
+ "POL-NTS" => "Policy - Name Type Specification",
22
+ "Primer" => "Primer",
23
+ "Profile" => "Profile",
24
+ "RFC" => "Request for Comment",
25
+ "Retired" => "Retired document",
26
+ "SAP" => "Standard Application Profile",
27
+ "TS" => "Test Suite",
28
+ "WhitePaper" => "Whitepaper",
29
+ }.freeze
30
+
31
+ class << self
32
+ # papam hit [Hash]
33
+ # @return [RelatonOgc::OrcBibliographicItem]
34
+ def parse_page(hit)
35
+ OgcBibliographicItem.new(
36
+ title: fetch_title(hit["title"]),
37
+ docid: fetch_docid(hit["identifier"]),
38
+ link: fetch_link(hit["URL"]),
39
+ type: fetch_type(hit["type"]),
40
+ edition: fetch_edition(hit["identifier"]),
41
+ abstract: fetch_abstract(hit["description"]),
42
+ contributor: fetch_contributor(hit),
43
+ language: ["en"],
44
+ script: ["Latn"],
45
+ date: fetch_date(hit["date"]),
46
+ )
47
+ end
48
+
49
+ private
50
+
51
+ # @param title [String]
52
+ # @return [Array<RelatonIsoBib::TypedTitleString>]
53
+ def fetch_title(title)
54
+ [
55
+ RelatonIsoBib::TypedTitleString.new(
56
+ type: "title-main", content: title, language: "en", script: "Latn",
57
+ format: "text/plain"
58
+ ),
59
+ RelatonIsoBib::TypedTitleString.new(
60
+ type: "main", content: title, language: "en", script: "Latn",
61
+ format: "text/plain"
62
+ ),
63
+ ]
64
+ end
65
+
66
+ # @param identifier [String]
67
+ # @return [Array<RelatonBib::DocumentIdentifier>]
68
+ def fetch_docid(identifier)
69
+ [RelatonBib::DocumentIdentifier.new(id: identifier, type: "OGC")]
70
+ end
71
+
72
+ # @param url [String]
73
+ # @return [Array>RelatonBib::TypedUri>]
74
+ def fetch_link(url)
75
+ [RelatonBib::TypedUri.new(type: "obp", content: url)]
76
+ end
77
+
78
+ # @param type [String]
79
+ # @return [String]
80
+ def fetch_type(type)
81
+ TYPES[type.sub(/^D-/, "")].downcase.gsub " ", "-"
82
+ end
83
+
84
+ # @param identifier [String]
85
+ # @return [String]
86
+ def fetch_edition(identifier)
87
+ %r{(?<=r)(?<edition>\d+)$} =~ identifier
88
+ edition
89
+ end
90
+
91
+ # @param description [String]
92
+ # @return [Array<RelatonBib::FormattedString>]
93
+ def fetch_abstract(description)
94
+ [RelatonBib::FormattedString.new(content: description, language: "en",
95
+ script: "Latn")]
96
+ end
97
+
98
+ # @param doc [Hash]
99
+ # @return [Array<RelatonBib::ContributionInfo>]
100
+ def fetch_contributor(doc)
101
+ contribs = doc["creator"].to_s.split(", ").map do |name|
102
+ personn_contrib name
103
+ end
104
+ contribs << org_contrib(doc["publisher"]) if doc["publisher"]
105
+ end
106
+
107
+ # @param name [String]
108
+ # @return [RelatonBib::ContributionInfo]
109
+ def personn_contrib(name)
110
+ fname = RelatonBib::FullName.new(
111
+ completename: RelatonBib::LocalizedString.new(name),
112
+ )
113
+ entity = RelatonBib::Person.new(name: fname)
114
+ RelatonBib::ContributionInfo.new(
115
+ entity: entity, role: [type: "author"],
116
+ )
117
+ end
118
+
119
+ # @param name [String]
120
+ # @return [RelatonBib::ContributionInfo]
121
+ def org_contrib(name)
122
+ entity = RelatonBib::Organization.new(name: name)
123
+ RelatonBib::ContributionInfo.new(
124
+ entity: entity, role: [type: "publisher"],
125
+ )
126
+ end
127
+
128
+ # @param date [String]
129
+ # @return [Array<RelatonBib::BibliographicDate>]
130
+ def fetch_date(date)
131
+ [RelatonBib::BibliographicDate.new(type: "published", on: date)]
132
+ end
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,3 @@
1
+ module RelatonOgc
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,33 @@
1
+ require "nokogiri"
2
+
3
+ module RelatonOgc
4
+ class XMLParser < RelatonIsoBib::XMLParser
5
+ class << self
6
+ # Override RelatonIsoBib::XMLParser.form_xml method.
7
+ # @param xml [String]
8
+ # @return [RelatonOgc::OgcBibliographicItem]
9
+ def from_xml(xml)
10
+ doc = Nokogiri::XML(xml)
11
+ isoitem = doc.at "/bibitem|/bibdata"
12
+ OgcBibliographicItem.new item_data(isoitem)
13
+ end
14
+
15
+ private
16
+
17
+ # @TODO Organization doesn't recreated
18
+ # @param ext [Nokogiri::XML::Element]
19
+ # @return [RelatonIsoBib::EditorialGroup]
20
+ def fetch_editorialgroup(ext)
21
+ eg = ext.at("./editorialgroup")
22
+ return unless eg
23
+
24
+ committe = eg&.at("committee")&.text
25
+ sc = iso_subgroup eg&.at("subcommittee")
26
+ wg = iso_subgroup eg&.at("workgroup")
27
+ EditorialGroup.new(
28
+ committee: committe, subcommittee: sc, workgroup: wg,
29
+ )
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,39 @@
1
+ lib = File.expand_path("lib", __dir__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require "relaton_ogc/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "relaton-ogc"
7
+ spec.version = RelatonOgc::VERSION
8
+ spec.authors = ["Ribose Inc."]
9
+ spec.email = ["open.source@ribose.com"]
10
+
11
+ spec.summary = "RelatonOgc: retrieve OGC Standards for bibliographic "\
12
+ "use using the OgcBibliographicItem model"
13
+ spec.description = "RelatonIso: retrieve ISO Standards for bibliographic "\
14
+ "use using the IsoBibliographicItem model"
15
+ spec.homepage = "https://github.com/relaton/relaton-ogc"
16
+ spec.license = "BSD-2-Clause"
17
+
18
+ # Specify which files should be added to the gem when it is released.
19
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
20
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
21
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
+ end
23
+ spec.bindir = "exe"
24
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
25
+ spec.require_paths = ["lib"]
26
+
27
+ spec.add_development_dependency "bundler", "~> 2.0"
28
+ spec.add_development_dependency "debase"
29
+ spec.add_development_dependency "equivalent-xml", "~> 0.6"
30
+ spec.add_development_dependency "rake", "~> 10.0"
31
+ spec.add_development_dependency "rspec", "~> 3.0"
32
+ spec.add_development_dependency "ruby-debug-ide"
33
+ spec.add_development_dependency "simplecov"
34
+ spec.add_development_dependency "vcr"
35
+ spec.add_development_dependency "webmock"
36
+
37
+ spec.add_dependency "faraday"
38
+ spec.add_dependency "relaton-iso-bib", "~> 0.3.0"
39
+ end