relaton-ogc 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,20 @@
1
+ module RelatonOgc
2
+ class HashConverter < RelatonIsoBib::HashConverter
3
+ class << self
4
+ private
5
+
6
+ # @param ret [Hash]
7
+ def editorialgroup_hash_to_bib(ret)
8
+ eg = ret[:editorialgroup]
9
+ return unless eg
10
+
11
+ ret[:editorialgroup] = EditorialGroup.new(
12
+ committee: eg[:committee],
13
+ subcommittee: eg[:subcommittee],
14
+ workgroup: eg[:workgroup],
15
+ secretariat: eg[:secretariat],
16
+ )
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,12 @@
1
+ module RelatonOgc
2
+ class Hit < RelatonBib::Hit
3
+ # @return [RelatonNist::HitCollection]
4
+ attr_reader :hit_collection
5
+
6
+ # Parse page.
7
+ # @return [RelatonNist::NistBliographicItem]
8
+ def fetch
9
+ @fetch ||= Scrapper.parse_page @hit
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,78 @@
1
+ require "faraday"
2
+ require "relaton_ogc/hit"
3
+
4
+ module RelatonOgc
5
+ class HitCollection < RelatonBib::HitCollection
6
+ ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"\
7
+ "NamingAuthority/master/incubation/bibliography/bibliography.json".freeze
8
+ DATAFILE = File.expand_path "data/bibliography.json", __dir__
9
+ ETAGFILE = File.expand_path "data/etag.txt", __dir__
10
+
11
+ # @param ref [Strig]
12
+ # @param year [String]
13
+ # @param opts [Hash]
14
+ def initialize(ref, year = nil, _opts = {})
15
+ @text = ref
16
+ @year = year
17
+ @fetched = false
18
+ hits = from_json(ref).sort_by do |hit|
19
+ hit.hit["date"] ? Date.parse(hit.hit["date"]) : Date.new
20
+ end
21
+ concat hits.reverse
22
+ end
23
+
24
+ private
25
+
26
+ #
27
+ # Fetch data form json
28
+ #
29
+ # @param docid [String]
30
+ def from_json(docid, **_opts)
31
+ ref = docid.sub /^OGC\s/, ""
32
+ data.select do |_k, doc|
33
+ doc["type"] != "CC" && doc["identifier"].include?(ref)
34
+ end.map { |_k, h| Hit.new(h, self) }
35
+ end
36
+
37
+ #
38
+ # Fetches json data
39
+ #
40
+ # @return [Hash]
41
+ def data
42
+ ctime = File.ctime DATAFILE if File.exist? DATAFILE
43
+ fetch_data if !ctime || ctime.to_date < Date.today
44
+ @data ||= JSON.parse File.read(DATAFILE, encoding: "UTF-8")
45
+ end
46
+
47
+ #
48
+ # fetch data form server and save it to file.
49
+ #
50
+ def fetch_data
51
+ resp = Faraday.new(ENDPOINT, headers: { "If-None-Match" => etag }).get
52
+ # return if there aren't any changes since last fetching
53
+ return unless resp.status == 200
54
+
55
+ self.etag = resp[:etag]
56
+ @data = JSON.parse resp.body
57
+ File.write DATAFILE, @data.to_json, encoding: "UTF-8"
58
+ end
59
+
60
+ #
61
+ # Read ETag form file
62
+ #
63
+ # @return [String, NilClass]
64
+ def etag
65
+ @etag ||= if File.exist? ETAGFILE
66
+ File.read ETAGFILE, encoding: "UTF-8"
67
+ end
68
+ end
69
+
70
+ #
71
+ # Save ETag to file
72
+ #
73
+ # @param tag [String]
74
+ def etag=(e_tag)
75
+ File.write ETAGFILE, e_tag, encoding: "UTF-8"
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,11 @@
1
+ module RelatonOgc
2
+ class OgcBibliographicItem < RelatonIsoBib::IsoBibliographicItem
3
+ TYPES = %w[
4
+ standard standard-with-suite
5
+ abstract-specification best-practice candidate-standard conformance-class
6
+ change-request community-standard discussion-paper draft-discussion-paper
7
+ interoperability-program-report implementation-standard
8
+ public-engineering-report
9
+ ].freeze
10
+ end
11
+ end
@@ -0,0 +1,77 @@
1
+ module RelatonOgc
2
+ class OgcBibliography
3
+ class << self
4
+ # @param text [String]
5
+ # @return [RelatonOgc::HitCollection]
6
+ def search(text, year = nil, opts = {})
7
+ HitCollection.new text, year, opts
8
+ rescue Faraday::Error::ConnectionFailed
9
+ raise RelatonBib::RequestError, "Could not access https://www.nist.gov"
10
+ end
11
+
12
+ # @param code [String] the OGC standard Code to look up (e..g "8200")
13
+ # @param year [String] the year the standard was published (optional)
14
+ #
15
+ # @param opts [Hash] options
16
+ # @option opts [TrueClass, FalseClass] :all_parts restricted to all parts
17
+ # if all-parts reference is required
18
+ # @option opts [TrueClass, FalseClass] :bibdata
19
+ #
20
+ # @return [String] Relaton XML serialisation of reference
21
+ def get(code, year = nil, opts = {})
22
+ result = bib_search_filter(code, year, opts) || (return nil)
23
+ ret = bib_results_filter(result, year)
24
+ return ret[:ret] if ret[:ret]
25
+
26
+ fetch_ref_err(code, year, ret[:years])
27
+ end
28
+
29
+ private
30
+
31
+ def bib_search_filter(code, year, opts)
32
+ warn "fetching #{code}..."
33
+ search(code, year, opts)
34
+ end
35
+
36
+ # Sort through the results from RelatonNist, fetching them three at a time,
37
+ # and return the first result that matches the code,
38
+ # matches the year (if provided), and which # has a title (amendments do not).
39
+ # Only expects the first page of results to be populated.
40
+ # Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
41
+ # If no match, returns any years which caused mismatch, for error reporting
42
+ #
43
+ # @param result
44
+ # @param opts [Hash] options
45
+ #
46
+ # @return [Hash]
47
+ def bib_results_filter(result, year)
48
+ missed_years = []
49
+ result.each do |r|
50
+ item = r.fetch
51
+ return { ret: item } if !year
52
+
53
+ item.date.select { |d| d.type == "published" }.each do |d|
54
+ return { ret: item } if year.to_i == d.on.year
55
+
56
+ missed_years << d.on.year
57
+ end
58
+ end
59
+ { years: missed_years }
60
+ end
61
+
62
+ # @param code [Strig]
63
+ # @param year [String]
64
+ # @param missed_years [Array<Strig>]
65
+ def fetch_ref_err(code, year, missed_years)
66
+ id = year ? "#{code} year #{year}" : code
67
+ warn "WARNING: no match found online for #{id}. "\
68
+ "The code must be exactly like it is on the standards website."
69
+ unless missed_years.empty?
70
+ warn "(There was no match for #{year}, though there were matches "\
71
+ "found for #{missed_years.join(', ')}.)"
72
+ end
73
+ nil
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,33 @@
1
+ require "relaton/processor"
2
+
3
+ module RelatonOgc
4
+ class Processor < Relaton::Processor
5
+ def initialize
6
+ @short = :relaton_ogc
7
+ @prefix = "OGC"
8
+ @defaultprefix = %r{^OGC\s}
9
+ @idtype = "OGC"
10
+ end
11
+
12
+ # @param code [String]
13
+ # @param date [String, NilClass] year
14
+ # @param opts [Hash]
15
+ # @return [RelatonOgc::OgcBibliographicItem]
16
+ def get(code, date = nil, opts = {})
17
+ ::RelatonOgc::OgcBibliography.get(code, date, opts)
18
+ end
19
+
20
+ # @param xml [String]
21
+ # @return [RelatonOgc::OgcBibliographicItem]
22
+ def from_xml(xml)
23
+ ::RelatonOgc::XMLParser.from_xml xml
24
+ end
25
+
26
+ # @param hash [Hash]
27
+ # @return [RelatonOgc::OgcBibliographicItem]
28
+ def hash_to_bib(hash)
29
+ item_hash = ::RelatonOgc::HashConverter.hash_to_bib(hash)
30
+ ::RelatonOgc::OgcBibliographicItem.new item_hash
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,135 @@
1
+ module RelatonOgc
2
+ module Scrapper
3
+ TYPES = {
4
+ "AS" => "Abstract Specification",
5
+ "BP" => "Best Practice",
6
+ "CAN" => "Candidate Standard",
7
+ "CC" => "Conformance Class",
8
+ "CR" => "Change Request",
9
+ "CS" => "Community Standard",
10
+ "DP" => "Discussion Paper",
11
+ "DP-Draft" => "Draft Discussion Paper",
12
+ "IPR" => "Interoperability Program Report - Engineering Specification",
13
+ "IS" => "Implementation Standard",
14
+ "ISC" => "Implementation Standard Corrigendum",
15
+ "ISx" => "Extension Package Standard",
16
+ "Notes" => "Notes",
17
+ "ORM" => "OGC Reference Model",
18
+ "PC" => "Profile Corrigendum",
19
+ "PER" => "Public Engineering Report",
20
+ "POL" => "Policy",
21
+ "POL-NTS" => "Policy - Name Type Specification",
22
+ "Primer" => "Primer",
23
+ "Profile" => "Profile",
24
+ "RFC" => "Request for Comment",
25
+ "Retired" => "Retired document",
26
+ "SAP" => "Standard Application Profile",
27
+ "TS" => "Test Suite",
28
+ "WhitePaper" => "Whitepaper",
29
+ }.freeze
30
+
31
+ class << self
32
+ # papam hit [Hash]
33
+ # @return [RelatonOgc::OrcBibliographicItem]
34
+ def parse_page(hit)
35
+ OgcBibliographicItem.new(
36
+ title: fetch_title(hit["title"]),
37
+ docid: fetch_docid(hit["identifier"]),
38
+ link: fetch_link(hit["URL"]),
39
+ type: fetch_type(hit["type"]),
40
+ edition: fetch_edition(hit["identifier"]),
41
+ abstract: fetch_abstract(hit["description"]),
42
+ contributor: fetch_contributor(hit),
43
+ language: ["en"],
44
+ script: ["Latn"],
45
+ date: fetch_date(hit["date"]),
46
+ )
47
+ end
48
+
49
+ private
50
+
51
+ # @param title [String]
52
+ # @return [Array<RelatonIsoBib::TypedTitleString>]
53
+ def fetch_title(title)
54
+ [
55
+ RelatonIsoBib::TypedTitleString.new(
56
+ type: "title-main", content: title, language: "en", script: "Latn",
57
+ format: "text/plain"
58
+ ),
59
+ RelatonIsoBib::TypedTitleString.new(
60
+ type: "main", content: title, language: "en", script: "Latn",
61
+ format: "text/plain"
62
+ ),
63
+ ]
64
+ end
65
+
66
+ # @param identifier [String]
67
+ # @return [Array<RelatonBib::DocumentIdentifier>]
68
+ def fetch_docid(identifier)
69
+ [RelatonBib::DocumentIdentifier.new(id: identifier, type: "OGC")]
70
+ end
71
+
72
+ # @param url [String]
73
+ # @return [Array>RelatonBib::TypedUri>]
74
+ def fetch_link(url)
75
+ [RelatonBib::TypedUri.new(type: "obp", content: url)]
76
+ end
77
+
78
+ # @param type [String]
79
+ # @return [String]
80
+ def fetch_type(type)
81
+ TYPES[type.sub(/^D-/, "")].downcase.gsub " ", "-"
82
+ end
83
+
84
+ # @param identifier [String]
85
+ # @return [String]
86
+ def fetch_edition(identifier)
87
+ %r{(?<=r)(?<edition>\d+)$} =~ identifier
88
+ edition
89
+ end
90
+
91
+ # @param description [String]
92
+ # @return [Array<RelatonBib::FormattedString>]
93
+ def fetch_abstract(description)
94
+ [RelatonBib::FormattedString.new(content: description, language: "en",
95
+ script: "Latn")]
96
+ end
97
+
98
+ # @param doc [Hash]
99
+ # @return [Array<RelatonBib::ContributionInfo>]
100
+ def fetch_contributor(doc)
101
+ contribs = doc["creator"].to_s.split(", ").map do |name|
102
+ personn_contrib name
103
+ end
104
+ contribs << org_contrib(doc["publisher"]) if doc["publisher"]
105
+ end
106
+
107
+ # @param name [String]
108
+ # @return [RelatonBib::ContributionInfo]
109
+ def personn_contrib(name)
110
+ fname = RelatonBib::FullName.new(
111
+ completename: RelatonBib::LocalizedString.new(name),
112
+ )
113
+ entity = RelatonBib::Person.new(name: fname)
114
+ RelatonBib::ContributionInfo.new(
115
+ entity: entity, role: [type: "author"],
116
+ )
117
+ end
118
+
119
+ # @param name [String]
120
+ # @return [RelatonBib::ContributionInfo]
121
+ def org_contrib(name)
122
+ entity = RelatonBib::Organization.new(name: name)
123
+ RelatonBib::ContributionInfo.new(
124
+ entity: entity, role: [type: "publisher"],
125
+ )
126
+ end
127
+
128
+ # @param date [String]
129
+ # @return [Array<RelatonBib::BibliographicDate>]
130
+ def fetch_date(date)
131
+ [RelatonBib::BibliographicDate.new(type: "published", on: date)]
132
+ end
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,3 @@
1
+ module RelatonOgc
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,33 @@
1
+ require "nokogiri"
2
+
3
+ module RelatonOgc
4
+ class XMLParser < RelatonIsoBib::XMLParser
5
+ class << self
6
+ # Override RelatonIsoBib::XMLParser.form_xml method.
7
+ # @param xml [String]
8
+ # @return [RelatonOgc::OgcBibliographicItem]
9
+ def from_xml(xml)
10
+ doc = Nokogiri::XML(xml)
11
+ isoitem = doc.at "/bibitem|/bibdata"
12
+ OgcBibliographicItem.new item_data(isoitem)
13
+ end
14
+
15
+ private
16
+
17
+ # @TODO Organization doesn't recreated
18
+ # @param ext [Nokogiri::XML::Element]
19
+ # @return [RelatonIsoBib::EditorialGroup]
20
+ def fetch_editorialgroup(ext)
21
+ eg = ext.at("./editorialgroup")
22
+ return unless eg
23
+
24
+ committe = eg&.at("committee")&.text
25
+ sc = iso_subgroup eg&.at("subcommittee")
26
+ wg = iso_subgroup eg&.at("workgroup")
27
+ EditorialGroup.new(
28
+ committee: committe, subcommittee: sc, workgroup: wg,
29
+ )
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,39 @@
1
+ lib = File.expand_path("lib", __dir__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require "relaton_ogc/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "relaton-ogc"
7
+ spec.version = RelatonOgc::VERSION
8
+ spec.authors = ["Ribose Inc."]
9
+ spec.email = ["open.source@ribose.com"]
10
+
11
+ spec.summary = "RelatonOgc: retrieve OGC Standards for bibliographic "\
12
+ "use using the OgcBibliographicItem model"
13
+ spec.description = "RelatonIso: retrieve ISO Standards for bibliographic "\
14
+ "use using the IsoBibliographicItem model"
15
+ spec.homepage = "https://github.com/relaton/relaton-ogc"
16
+ spec.license = "BSD-2-Clause"
17
+
18
+ # Specify which files should be added to the gem when it is released.
19
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
20
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
21
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
+ end
23
+ spec.bindir = "exe"
24
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
25
+ spec.require_paths = ["lib"]
26
+
27
+ spec.add_development_dependency "bundler", "~> 2.0"
28
+ spec.add_development_dependency "debase"
29
+ spec.add_development_dependency "equivalent-xml", "~> 0.6"
30
+ spec.add_development_dependency "rake", "~> 10.0"
31
+ spec.add_development_dependency "rspec", "~> 3.0"
32
+ spec.add_development_dependency "ruby-debug-ide"
33
+ spec.add_development_dependency "simplecov"
34
+ spec.add_development_dependency "vcr"
35
+ spec.add_development_dependency "webmock"
36
+
37
+ spec.add_dependency "faraday"
38
+ spec.add_dependency "relaton-iso-bib", "~> 0.3.0"
39
+ end