relaton-ogc 1.9.1 → 1.9.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7fa7cf7cb4a2e7b21cb49a4fbb482f08119776626d18b6ab186080905783d516
4
- data.tar.gz: d4bb20a2c3f35b1c25ac99882c5a3f7a405a420cc5d3dbcb2f6359b0f2f6166f
3
+ metadata.gz: d9ddd36656e221ad7ceb82adf637197b15648451b42fbd78069bc02078082c05
4
+ data.tar.gz: 9179d1d896e37f26841c392701c500990eafbfdb1060a4120bb85189014d788e
5
5
  SHA512:
6
- metadata.gz: d5679900661ae943e2e929500314cfbb98fe416ebe8c809fccd0daa6f5d2c6232d9c85844d406372ab41af3c413de914944fd7b0433e24c7582a80942fe7d710
7
- data.tar.gz: c187307b81cc069bc652f38e494ed01de8dfe96de9c00b7fc4c719f73dcff62335cc3d4167a4d32b1b402d31ac9df7e76fe5a77e6e2295cf4c6f1937c6b1f4a5
6
+ metadata.gz: 0ab0f2b5ed50fd8f218c92ae1a8589df818f92bb2480fffc450fbe930c16185cc9855d683dc4aafb5d34ad4f808ffd1cc44ba11b19adc6550d62dc029e7c0b11
7
+ data.tar.gz: c62733901e8f8790eaaccf90345503fd187bba8e8a08655400fdd77aa508df640ec7a4f11ef1146e41b0e456c538954769cbb4ef2d6eacba3b6b2cac73a70e01
data/README.adoc CHANGED
@@ -113,6 +113,25 @@ RelatonOgc::OgcBibliographicItem.from_hash hash
113
113
  ...
114
114
  ----
115
115
 
116
+ === Fetch data
117
+
118
+ This gem uses the https://raw.githubusercontent.com/opengeospatial/NamingAuthority/master/incubation/bibliography/bibliography.json dataset as a data sources.
119
+
120
+ The method `RelatonOgc::DataFetcher.fetch(output: "data", format: "yaml")` fetches all the documents from the datast and save them to the `./data` folder in YAML format.
121
+ Arguments:
122
+
123
+ - `output` - folder to save documents (default './data').
124
+ - `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml` (default `yaml`).
125
+
126
+ [source,ruby]
127
+ ----
128
+ RelatonOgc::DataFetcher.fetch
129
+ Started at: 2021-09-14 11:21:46 +0200
130
+ [relaton-ogc] WARNING Duplicated documents: 15-113r5, 08-094r1, 10-025r1, 12-128r14, 16-079, 16-007r3, 13-026r8, 12-128r12, 15-078r6, 12-176r7, 09-102r3, 14-095, 14-115, 07-147r2, 12-000, 12-006, 09-025r1, 07-036, 07-110r4, 03-105r1, 06-042, 07-165r1, 12-066, 06-104r4, 11-122r1, 09-000, 04-094, 07-006r1, 06-035r1, 03-006r3, 05-134, 04-021r3, 02-058, 01-009
131
+ Stopped at: 2021-09-14 11:21:48 +0200
132
+ => nil
133
+ ----
134
+
116
135
  == Development
117
136
 
118
137
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -0,0 +1,96 @@
1
+ module RelatonOgc
2
+ class DataFetcher
3
+ module Utils
4
+ ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"\
5
+ "NamingAuthority/master/incubation/bibliography/"\
6
+ "bibliography.json".freeze
7
+
8
+ def get_data # rubocop:disable Metrics/AbcSize
9
+ h = {}
10
+ h["If-None-Match"] = etag if etag
11
+ resp = Faraday.new(ENDPOINT, headers: h).get
12
+ case resp.status
13
+ when 200
14
+ json = JSON.parse(resp.body)
15
+ block_given? ? yield(resp[:etag], json) : json
16
+ when 304 then [] # there aren't any changes since last fetching
17
+ else raise RelatonBib::RequestError, "Could not access #{ENDPOINT}"
18
+ end
19
+ end
20
+
21
+ #
22
+ # Read ETag form file
23
+ #
24
+ # @return [String, NilClass]
25
+ def etag
26
+ @etag ||= if File.exist? @etagfile
27
+ File.read @etagfile, encoding: "UTF-8"
28
+ end
29
+ end
30
+
31
+ #
32
+ # Save ETag to file
33
+ #
34
+ # @param tag [String]
35
+ def etag=(e_tag)
36
+ File.write @etagfile, e_tag, encoding: "UTF-8"
37
+ end
38
+ end
39
+
40
+ include Utils
41
+
42
+ #
43
+ # Create DataFetcher instance
44
+ #
45
+ # @param [String] output directory to save the documents
46
+ # @param [String] format output format "yaml" or "xmo"
47
+ #
48
+ def initialize(output, format)
49
+ @output = output
50
+ @etagfile = File.join output, "etag.txt"
51
+ @format = format
52
+ @docids = []
53
+ @dupids = []
54
+ end
55
+
56
+ def self.fetch(output: "data", format: "yaml")
57
+ t1 = Time.now
58
+ puts "Started at: #{t1}"
59
+ FileUtils.mkdir_p output unless Dir.exist? output
60
+ new(output, format).fetch
61
+ t2 = Time.now
62
+ puts "Stopped at: #{t2}"
63
+ puts "Done in: #{(t2 - t1).round} sec."
64
+ end
65
+
66
+ def fetch # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
67
+ get_data do |etag, json|
68
+ no_errors = true
69
+ json.each do |_, hit|
70
+ bib = Scrapper.parse_page hit
71
+ write_document bib
72
+ rescue StandardError => e
73
+ no_errors = false
74
+ warn "Fetching document: #{hit['identifier']}"
75
+ warn "#{e.class} #{e.message}"
76
+ warn e.backtrace
77
+ end
78
+ warn "[relaton-ogc] WARNING Duplicated documents: #{@dupids.uniq.join(', ')}" if @dupids.any?
79
+ self.etag = etag if no_errors
80
+ end
81
+ end
82
+
83
+ def write_document(bib) # rubocop:disable Metrics/AbcSize
84
+ if @docids.include?(bib.docidentifier[0].id)
85
+ @dupids << bib.docidentifier[0].id
86
+ return
87
+ end
88
+
89
+ @docids << bib.docidentifier[0].id
90
+ name = bib.docidentifier[0].id.upcase.gsub(/[\s:.]/, "_")
91
+ file = "#{@output}/#{name}.#{@format}"
92
+ content = @format == "xml" ? bib.to_xml(bibdata: true) : bib.to_hash.to_yaml
93
+ File.write file, content, encoding: "UTF-8"
94
+ end
95
+ end
96
+ end
@@ -4,23 +4,25 @@ require "fileutils"
4
4
 
5
5
  module RelatonOgc
6
6
  class HitCollection < RelatonBib::HitCollection
7
- ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"\
8
- "NamingAuthority/master/incubation/bibliography/bibliography.json".freeze
7
+ include DataFetcher::Utils
8
+
9
+ # ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"\
10
+ # "NamingAuthority/master/incubation/bibliography/"\
11
+ # "bibliography.json".freeze
9
12
  DATADIR = File.expand_path ".relaton/ogc/", Dir.home
10
13
  DATAFILE = File.expand_path "bibliography.json", DATADIR
11
- ETAGFILE = File.expand_path "etag.txt", DATADIR
14
+ # ETAGFILE = File.expand_path "etag.txt", DATADIR
12
15
 
13
16
  # @param ref [Strig]
14
17
  # @param year [String]
15
18
  # @param opts [Hash]
16
19
  def initialize(ref, year = nil)
17
20
  super
21
+ @etagfile = File.expand_path "etag.txt", DATADIR
18
22
  @array = from_json(ref).sort_by do |hit|
19
- begin
20
- hit.hit["date"] ? Date.parse(hit.hit["date"]) : Date.new
21
- rescue ArgumentError
22
- Date.parse "0000-01-01"
23
- end
23
+ hit.hit["date"] ? Date.parse(hit.hit["date"]) : Date.new
24
+ rescue ArgumentError
25
+ Date.parse "0000-01-01"
24
26
  end.reverse
25
27
  end
26
28
 
@@ -52,38 +54,13 @@ module RelatonOgc
52
54
  #
53
55
  # fetch data form server and save it to file.
54
56
  #
55
- def fetch_data # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
56
- h = {}
57
- h["If-None-Match"] = etag if etag
58
- resp = Faraday.new(ENDPOINT, headers: h).get
59
- # return if there aren't any changes since last fetching
60
- return if resp.status == 304
61
- unless resp.status == 200
62
- raise RelatonBib::RequestError, "Could not access #{ENDPOINT}"
63
- end
57
+ def fetch_data
58
+ json = get_data
59
+ return unless json
64
60
 
65
61
  FileUtils.mkdir_p DATADIR unless Dir.exist? DATADIR
66
- self.etag = resp[:etag]
67
- @data = JSON.parse resp.body
62
+ @data = json
68
63
  File.write DATAFILE, @data.to_json, encoding: "UTF-8"
69
64
  end
70
-
71
- #
72
- # Read ETag form file
73
- #
74
- # @return [String, NilClass]
75
- def etag
76
- @etag ||= if File.exist? ETAGFILE
77
- File.read ETAGFILE, encoding: "UTF-8"
78
- end
79
- end
80
-
81
- #
82
- # Save ETag to file
83
- #
84
- # @param tag [String]
85
- def etag=(e_tag)
86
- File.write ETAGFILE, e_tag, encoding: "UTF-8"
87
- end
88
65
  end
89
66
  end
@@ -2,11 +2,12 @@ require "relaton/processor"
2
2
 
3
3
  module RelatonOgc
4
4
  class Processor < Relaton::Processor
5
- def initialize
5
+ def initialize # rubocop:disable Lint/MissingSuper
6
6
  @short = :relaton_ogc
7
7
  @prefix = "OGC"
8
8
  @defaultprefix = %r{^OGC\s}
9
9
  @idtype = "OGC"
10
+ @datasets = %w[ogc-naming-authority]
10
11
  end
11
12
 
12
13
  # @param code [String]
@@ -17,6 +18,18 @@ module RelatonOgc
17
18
  ::RelatonOgc::OgcBibliography.get(code, date, opts)
18
19
  end
19
20
 
21
+ #
22
+ # Fetch all the documents from a source
23
+ #
24
+ # @param [String] _source source name
25
+ # @param [Hash] opts
26
+ # @option opts [String] :output directory to output documents
27
+ # @option opts [String] :format
28
+ #
29
+ def fetch_data(_source, opts)
30
+ DataFetcher.fetch(**opts)
31
+ end
32
+
20
33
  # @param xml [String]
21
34
  # @return [RelatonOgc::OgcBibliographicItem]
22
35
  def from_xml(xml)
@@ -13,7 +13,7 @@ module RelatonOgc
13
13
  "IPR" => { type: "engineering-report" },
14
14
  "IS" => { type: "standard", subtype: "implementation" },
15
15
  "ISC" => { type: "standard", subtype: "implementation" },
16
- "ISx" => { type: "standard", subtype: "extesion" },
16
+ "ISx" => { type: "standard", subtype: "extension" },
17
17
  "Notes" => { type: "other" },
18
18
  "ORM" => { type: "reference-model" },
19
19
  "PC" => { type: "standard", subtype: "profile" },
@@ -88,7 +88,7 @@ module RelatonOgc
88
88
  # @param stage [String]
89
89
  # @return [RelatonBib::DocumentStatus, NilClass]
90
90
  def fetch_status(stage)
91
- stage && RelatonBib::DocunentStatus.new(stage: stage)
91
+ stage && RelatonBib::DocumentStatus.new(stage: stage)
92
92
  end
93
93
 
94
94
  # @param identifier [String]
@@ -138,6 +138,8 @@ module RelatonOgc
138
138
  # @param date [String]
139
139
  # @return [Array<RelatonBib::BibliographicDate>]
140
140
  def fetch_date(date)
141
+ return [] unless date
142
+
141
143
  [RelatonBib::BibliographicDate.new(type: "published", on: date)]
142
144
  end
143
145
  end
@@ -1,3 +1,3 @@
1
1
  module RelatonOgc
2
- VERSION = "1.9.1".freeze
2
+ VERSION = "1.9.2".freeze
3
3
  end
data/lib/relaton_ogc.rb CHANGED
@@ -2,6 +2,7 @@ require "relaton_iso_bib"
2
2
  require "relaton_ogc/version"
3
3
  require "relaton_ogc/ogc_bibliographic_item"
4
4
  require "relaton_ogc/ogc_bibliography"
5
+ require "relaton_ogc/data_fetcher"
5
6
  require "relaton_ogc/hit_collection"
6
7
  require "relaton_ogc/scrapper"
7
8
  require "relaton_ogc/xml_parser"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-ogc
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.1
4
+ version: 1.9.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-09-07 00:00:00.000000000 Z
11
+ date: 2021-09-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -175,6 +175,7 @@ files:
175
175
  - grammars/ogc.rng
176
176
  - grammars/reqt.rng
177
177
  - lib/relaton_ogc.rb
178
+ - lib/relaton_ogc/data_fetcher.rb
178
179
  - lib/relaton_ogc/editorial_group.rb
179
180
  - lib/relaton_ogc/hash_converter.rb
180
181
  - lib/relaton_ogc/hit.rb