relaton-ogc 1.9.1 → 1.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7fa7cf7cb4a2e7b21cb49a4fbb482f08119776626d18b6ab186080905783d516
4
- data.tar.gz: d4bb20a2c3f35b1c25ac99882c5a3f7a405a420cc5d3dbcb2f6359b0f2f6166f
3
+ metadata.gz: d9ddd36656e221ad7ceb82adf637197b15648451b42fbd78069bc02078082c05
4
+ data.tar.gz: 9179d1d896e37f26841c392701c500990eafbfdb1060a4120bb85189014d788e
5
5
  SHA512:
6
- metadata.gz: d5679900661ae943e2e929500314cfbb98fe416ebe8c809fccd0daa6f5d2c6232d9c85844d406372ab41af3c413de914944fd7b0433e24c7582a80942fe7d710
7
- data.tar.gz: c187307b81cc069bc652f38e494ed01de8dfe96de9c00b7fc4c719f73dcff62335cc3d4167a4d32b1b402d31ac9df7e76fe5a77e6e2295cf4c6f1937c6b1f4a5
6
+ metadata.gz: 0ab0f2b5ed50fd8f218c92ae1a8589df818f92bb2480fffc450fbe930c16185cc9855d683dc4aafb5d34ad4f808ffd1cc44ba11b19adc6550d62dc029e7c0b11
7
+ data.tar.gz: c62733901e8f8790eaaccf90345503fd187bba8e8a08655400fdd77aa508df640ec7a4f11ef1146e41b0e456c538954769cbb4ef2d6eacba3b6b2cac73a70e01
data/README.adoc CHANGED
@@ -113,6 +113,25 @@ RelatonOgc::OgcBibliographicItem.from_hash hash
113
113
  ...
114
114
  ----
115
115
 
116
+ === Fetch data
117
+
118
+ This gem uses the https://raw.githubusercontent.com/opengeospatial/NamingAuthority/master/incubation/bibliography/bibliography.json dataset as a data sources.
119
+
120
+ The method `RelatonOgc::DataFetcher.fetch(output: "data", format: "yaml")` fetches all the documents from the datast and save them to the `./data` folder in YAML format.
121
+ Arguments:
122
+
123
+ - `output` - folder to save documents (default './data').
124
+ - `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml` (default `yaml`).
125
+
126
+ [source,ruby]
127
+ ----
128
+ RelatonOgc::DataFetcher.fetch
129
+ Started at: 2021-09-14 11:21:46 +0200
130
+ [relaton-ogc] WARNING Duplicated documents: 15-113r5, 08-094r1, 10-025r1, 12-128r14, 16-079, 16-007r3, 13-026r8, 12-128r12, 15-078r6, 12-176r7, 09-102r3, 14-095, 14-115, 07-147r2, 12-000, 12-006, 09-025r1, 07-036, 07-110r4, 03-105r1, 06-042, 07-165r1, 12-066, 06-104r4, 11-122r1, 09-000, 04-094, 07-006r1, 06-035r1, 03-006r3, 05-134, 04-021r3, 02-058, 01-009
131
+ Stopped at: 2021-09-14 11:21:48 +0200
132
+ => nil
133
+ ----
134
+
116
135
  == Development
117
136
 
118
137
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -0,0 +1,96 @@
1
+ module RelatonOgc
2
+ class DataFetcher
3
+ module Utils
4
+ ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"\
5
+ "NamingAuthority/master/incubation/bibliography/"\
6
+ "bibliography.json".freeze
7
+
8
+ def get_data # rubocop:disable Metrics/AbcSize
9
+ h = {}
10
+ h["If-None-Match"] = etag if etag
11
+ resp = Faraday.new(ENDPOINT, headers: h).get
12
+ case resp.status
13
+ when 200
14
+ json = JSON.parse(resp.body)
15
+ block_given? ? yield(resp[:etag], json) : json
16
+ when 304 then [] # there aren't any changes since last fetching
17
+ else raise RelatonBib::RequestError, "Could not access #{ENDPOINT}"
18
+ end
19
+ end
20
+
21
+ #
22
+ # Read ETag form file
23
+ #
24
+ # @return [String, NilClass]
25
+ def etag
26
+ @etag ||= if File.exist? @etagfile
27
+ File.read @etagfile, encoding: "UTF-8"
28
+ end
29
+ end
30
+
31
+ #
32
+ # Save ETag to file
33
+ #
34
+ # @param tag [String]
35
+ def etag=(e_tag)
36
+ File.write @etagfile, e_tag, encoding: "UTF-8"
37
+ end
38
+ end
39
+
40
+ include Utils
41
+
42
+ #
43
+ # Create DataFetcher instance
44
+ #
45
+ # @param [String] output directory to save the documents
46
+ # @param [String] format output format "yaml" or "xmo"
47
+ #
48
+ def initialize(output, format)
49
+ @output = output
50
+ @etagfile = File.join output, "etag.txt"
51
+ @format = format
52
+ @docids = []
53
+ @dupids = []
54
+ end
55
+
56
+ def self.fetch(output: "data", format: "yaml")
57
+ t1 = Time.now
58
+ puts "Started at: #{t1}"
59
+ FileUtils.mkdir_p output unless Dir.exist? output
60
+ new(output, format).fetch
61
+ t2 = Time.now
62
+ puts "Stopped at: #{t2}"
63
+ puts "Done in: #{(t2 - t1).round} sec."
64
+ end
65
+
66
+ def fetch # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
67
+ get_data do |etag, json|
68
+ no_errors = true
69
+ json.each do |_, hit|
70
+ bib = Scrapper.parse_page hit
71
+ write_document bib
72
+ rescue StandardError => e
73
+ no_errors = false
74
+ warn "Fetching document: #{hit['identifier']}"
75
+ warn "#{e.class} #{e.message}"
76
+ warn e.backtrace
77
+ end
78
+ warn "[relaton-ogc] WARNING Duplicated documents: #{@dupids.uniq.join(', ')}" if @dupids.any?
79
+ self.etag = etag if no_errors
80
+ end
81
+ end
82
+
83
+ def write_document(bib) # rubocop:disable Metrics/AbcSize
84
+ if @docids.include?(bib.docidentifier[0].id)
85
+ @dupids << bib.docidentifier[0].id
86
+ return
87
+ end
88
+
89
+ @docids << bib.docidentifier[0].id
90
+ name = bib.docidentifier[0].id.upcase.gsub(/[\s:.]/, "_")
91
+ file = "#{@output}/#{name}.#{@format}"
92
+ content = @format == "xml" ? bib.to_xml(bibdata: true) : bib.to_hash.to_yaml
93
+ File.write file, content, encoding: "UTF-8"
94
+ end
95
+ end
96
+ end
@@ -4,23 +4,25 @@ require "fileutils"
4
4
 
5
5
  module RelatonOgc
6
6
  class HitCollection < RelatonBib::HitCollection
7
- ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"\
8
- "NamingAuthority/master/incubation/bibliography/bibliography.json".freeze
7
+ include DataFetcher::Utils
8
+
9
+ # ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"\
10
+ # "NamingAuthority/master/incubation/bibliography/"\
11
+ # "bibliography.json".freeze
9
12
  DATADIR = File.expand_path ".relaton/ogc/", Dir.home
10
13
  DATAFILE = File.expand_path "bibliography.json", DATADIR
11
- ETAGFILE = File.expand_path "etag.txt", DATADIR
14
+ # ETAGFILE = File.expand_path "etag.txt", DATADIR
12
15
 
13
16
  # @param ref [Strig]
14
17
  # @param year [String]
15
18
  # @param opts [Hash]
16
19
  def initialize(ref, year = nil)
17
20
  super
21
+ @etagfile = File.expand_path "etag.txt", DATADIR
18
22
  @array = from_json(ref).sort_by do |hit|
19
- begin
20
- hit.hit["date"] ? Date.parse(hit.hit["date"]) : Date.new
21
- rescue ArgumentError
22
- Date.parse "0000-01-01"
23
- end
23
+ hit.hit["date"] ? Date.parse(hit.hit["date"]) : Date.new
24
+ rescue ArgumentError
25
+ Date.parse "0000-01-01"
24
26
  end.reverse
25
27
  end
26
28
 
@@ -52,38 +54,13 @@ module RelatonOgc
52
54
  #
53
55
  # fetch data form server and save it to file.
54
56
  #
55
- def fetch_data # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
56
- h = {}
57
- h["If-None-Match"] = etag if etag
58
- resp = Faraday.new(ENDPOINT, headers: h).get
59
- # return if there aren't any changes since last fetching
60
- return if resp.status == 304
61
- unless resp.status == 200
62
- raise RelatonBib::RequestError, "Could not access #{ENDPOINT}"
63
- end
57
+ def fetch_data
58
+ json = get_data
59
+ return unless json
64
60
 
65
61
  FileUtils.mkdir_p DATADIR unless Dir.exist? DATADIR
66
- self.etag = resp[:etag]
67
- @data = JSON.parse resp.body
62
+ @data = json
68
63
  File.write DATAFILE, @data.to_json, encoding: "UTF-8"
69
64
  end
70
-
71
- #
72
- # Read ETag form file
73
- #
74
- # @return [String, NilClass]
75
- def etag
76
- @etag ||= if File.exist? ETAGFILE
77
- File.read ETAGFILE, encoding: "UTF-8"
78
- end
79
- end
80
-
81
- #
82
- # Save ETag to file
83
- #
84
- # @param tag [String]
85
- def etag=(e_tag)
86
- File.write ETAGFILE, e_tag, encoding: "UTF-8"
87
- end
88
65
  end
89
66
  end
@@ -2,11 +2,12 @@ require "relaton/processor"
2
2
 
3
3
  module RelatonOgc
4
4
  class Processor < Relaton::Processor
5
- def initialize
5
+ def initialize # rubocop:disable Lint/MissingSuper
6
6
  @short = :relaton_ogc
7
7
  @prefix = "OGC"
8
8
  @defaultprefix = %r{^OGC\s}
9
9
  @idtype = "OGC"
10
+ @datasets = %w[ogc-naming-authority]
10
11
  end
11
12
 
12
13
  # @param code [String]
@@ -17,6 +18,18 @@ module RelatonOgc
17
18
  ::RelatonOgc::OgcBibliography.get(code, date, opts)
18
19
  end
19
20
 
21
+ #
22
+ # Fetch all the documents from a source
23
+ #
24
+ # @param [String] _source source name
25
+ # @param [Hash] opts
26
+ # @option opts [String] :output directory to output documents
27
+ # @option opts [String] :format
28
+ #
29
+ def fetch_data(_source, opts)
30
+ DataFetcher.fetch(**opts)
31
+ end
32
+
20
33
  # @param xml [String]
21
34
  # @return [RelatonOgc::OgcBibliographicItem]
22
35
  def from_xml(xml)
@@ -13,7 +13,7 @@ module RelatonOgc
13
13
  "IPR" => { type: "engineering-report" },
14
14
  "IS" => { type: "standard", subtype: "implementation" },
15
15
  "ISC" => { type: "standard", subtype: "implementation" },
16
- "ISx" => { type: "standard", subtype: "extesion" },
16
+ "ISx" => { type: "standard", subtype: "extension" },
17
17
  "Notes" => { type: "other" },
18
18
  "ORM" => { type: "reference-model" },
19
19
  "PC" => { type: "standard", subtype: "profile" },
@@ -88,7 +88,7 @@ module RelatonOgc
88
88
  # @param stage [String]
89
89
  # @return [RelatonBib::DocumentStatus, NilClass]
90
90
  def fetch_status(stage)
91
- stage && RelatonBib::DocunentStatus.new(stage: stage)
91
+ stage && RelatonBib::DocumentStatus.new(stage: stage)
92
92
  end
93
93
 
94
94
  # @param identifier [String]
@@ -138,6 +138,8 @@ module RelatonOgc
138
138
  # @param date [String]
139
139
  # @return [Array<RelatonBib::BibliographicDate>]
140
140
  def fetch_date(date)
141
+ return [] unless date
142
+
141
143
  [RelatonBib::BibliographicDate.new(type: "published", on: date)]
142
144
  end
143
145
  end
@@ -1,3 +1,3 @@
1
1
  module RelatonOgc
2
- VERSION = "1.9.1".freeze
2
+ VERSION = "1.9.2".freeze
3
3
  end
data/lib/relaton_ogc.rb CHANGED
@@ -2,6 +2,7 @@ require "relaton_iso_bib"
2
2
  require "relaton_ogc/version"
3
3
  require "relaton_ogc/ogc_bibliographic_item"
4
4
  require "relaton_ogc/ogc_bibliography"
5
+ require "relaton_ogc/data_fetcher"
5
6
  require "relaton_ogc/hit_collection"
6
7
  require "relaton_ogc/scrapper"
7
8
  require "relaton_ogc/xml_parser"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-ogc
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.1
4
+ version: 1.9.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-09-07 00:00:00.000000000 Z
11
+ date: 2021-09-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -175,6 +175,7 @@ files:
175
175
  - grammars/ogc.rng
176
176
  - grammars/reqt.rng
177
177
  - lib/relaton_ogc.rb
178
+ - lib/relaton_ogc/data_fetcher.rb
178
179
  - lib/relaton_ogc/editorial_group.rb
179
180
  - lib/relaton_ogc/hash_converter.rb
180
181
  - lib/relaton_ogc/hit.rb