relaton-ogc 1.8.0 → 1.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -11
- data/.rubocop.yml +1 -1
- data/README.adoc +20 -1
- data/bin/rspec +29 -0
- data/grammars/biblio.rng +1 -0
- data/grammars/isodoc.rng +72 -10
- data/grammars/ogc.rng +13 -15
- data/lib/relaton_ogc/data_fetcher.rb +98 -0
- data/lib/relaton_ogc/editorial_group.rb +1 -1
- data/lib/relaton_ogc/hash_converter.rb +1 -1
- data/lib/relaton_ogc/hit.rb +12 -1
- data/lib/relaton_ogc/hit_collection.rb +44 -59
- data/lib/relaton_ogc/ogc_bibliographic_item.rb +17 -21
- data/lib/relaton_ogc/ogc_bibliography.rb +2 -1
- data/lib/relaton_ogc/processor.rb +14 -1
- data/lib/relaton_ogc/scrapper.rb +6 -4
- data/lib/relaton_ogc/version.rb +1 -1
- data/lib/relaton_ogc/xml_parser.rb +9 -9
- data/lib/relaton_ogc.rb +1 -0
- data/relaton_ogc.gemspec +3 -4
- metadata +9 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 98e672b4b0d18f3feb9eb6d825d7111e40f3ee97d3886fb452c30c41f4a1dcc0
|
|
4
|
+
data.tar.gz: c9d939a8086f497f95390fd927bb1dee14986bf1b24825a4d0394a3467d9ec3c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: de8f29ea78226046ce2138ebacbe2625b0f2b07a6b402d24c3eb5ffb4f9dec2f88062ea185eeb34c04ee363f138664df040342257d04bdbc44a5b5dab3fb2497
|
|
7
|
+
data.tar.gz: d158cfaa42d1ea4cca7bc60463414723d7f78e81a0d8671a9fe647f8d17fb6cf88098d1d46085999a4712f7ffc989a6bf8ad65b20303f949ec1718774e02b3f9
|
data/.github/workflows/rake.yml
CHANGED
|
@@ -16,19 +16,9 @@ jobs:
|
|
|
16
16
|
strategy:
|
|
17
17
|
fail-fast: false
|
|
18
18
|
matrix:
|
|
19
|
-
ruby: [ '
|
|
19
|
+
ruby: [ '3.0', '2.7', '2.6', '2.5' ]
|
|
20
20
|
os: [ ubuntu-latest, windows-latest, macos-latest ]
|
|
21
21
|
experimental: [ false ]
|
|
22
|
-
include:
|
|
23
|
-
- ruby: '3.0'
|
|
24
|
-
os: 'ubuntu-latest'
|
|
25
|
-
experimental: true
|
|
26
|
-
- ruby: '3.0'
|
|
27
|
-
os: 'windows-latest'
|
|
28
|
-
experimental: true
|
|
29
|
-
- ruby: '3.0'
|
|
30
|
-
os: 'macos-latest'
|
|
31
|
-
experimental: true
|
|
32
22
|
steps:
|
|
33
23
|
- uses: actions/checkout@v2
|
|
34
24
|
with:
|
data/.rubocop.yml
CHANGED
data/README.adoc
CHANGED
|
@@ -39,7 +39,7 @@ require 'relaton_ogc'
|
|
|
39
39
|
=> true
|
|
40
40
|
|
|
41
41
|
hits = RelatonOgc::OgcBibliography.search("OGC 19-025r1")
|
|
42
|
-
=> <RelatonOgc::HitCollection:0x007fcc8e085ba8 @ref=OGC 19-025r1 @fetched=
|
|
42
|
+
=> <RelatonOgc::HitCollection:0x007fcc8e085ba8 @ref=OGC 19-025r1 @fetched=true>
|
|
43
43
|
|
|
44
44
|
tem = hits[0].fetch
|
|
45
45
|
=> #<RelatonOgc::OgcBibliographicItem:0x007fcab3cb4758
|
|
@@ -113,6 +113,25 @@ RelatonOgc::OgcBibliographicItem.from_hash hash
|
|
|
113
113
|
...
|
|
114
114
|
----
|
|
115
115
|
|
|
116
|
+
=== Fetch data
|
|
117
|
+
|
|
118
|
+
This gem uses the https://raw.githubusercontent.com/opengeospatial/NamingAuthority/master/incubation/bibliography/bibliography.json dataset as a data sources.
|
|
119
|
+
|
|
120
|
+
The method `RelatonOgc::DataFetcher.fetch(output: "data", format: "yaml")` fetches all the documents from the datast and save them to the `./data` folder in YAML format.
|
|
121
|
+
Arguments:
|
|
122
|
+
|
|
123
|
+
- `output` - folder to save documents (default './data').
|
|
124
|
+
- `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml` (default `yaml`).
|
|
125
|
+
|
|
126
|
+
[source,ruby]
|
|
127
|
+
----
|
|
128
|
+
RelatonOgc::DataFetcher.fetch
|
|
129
|
+
Started at: 2021-09-14 11:21:46 +0200
|
|
130
|
+
[relaton-ogc] WARNING Duplicated documents: 15-113r5, 08-094r1, 10-025r1, 12-128r14, 16-079, 16-007r3, 13-026r8, 12-128r12, 15-078r6, 12-176r7, 09-102r3, 14-095, 14-115, 07-147r2, 12-000, 12-006, 09-025r1, 07-036, 07-110r4, 03-105r1, 06-042, 07-165r1, 12-066, 06-104r4, 11-122r1, 09-000, 04-094, 07-006r1, 06-035r1, 03-006r3, 05-134, 04-021r3, 02-058, 01-009
|
|
131
|
+
Stopped at: 2021-09-14 11:21:48 +0200
|
|
132
|
+
=> nil
|
|
133
|
+
----
|
|
134
|
+
|
|
116
135
|
== Development
|
|
117
136
|
|
|
118
137
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/bin/rspec
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
#
|
|
5
|
+
# This file was generated by Bundler.
|
|
6
|
+
#
|
|
7
|
+
# The application 'rspec' is installed as part of a gem, and
|
|
8
|
+
# this file is here to facilitate running it.
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
require "pathname"
|
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
|
13
|
+
Pathname.new(__FILE__).realpath)
|
|
14
|
+
|
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
|
16
|
+
|
|
17
|
+
if File.file?(bundle_binstub)
|
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
|
19
|
+
load(bundle_binstub)
|
|
20
|
+
else
|
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
require "rubygems"
|
|
27
|
+
require "bundler/setup"
|
|
28
|
+
|
|
29
|
+
load Gem.bin_path("rspec-core", "rspec")
|
data/grammars/biblio.rng
CHANGED
data/grammars/isodoc.rng
CHANGED
|
@@ -45,6 +45,11 @@
|
|
|
45
45
|
<optional>
|
|
46
46
|
<attribute name="alt"/>
|
|
47
47
|
</optional>
|
|
48
|
+
<optional>
|
|
49
|
+
<attribute name="updatetype">
|
|
50
|
+
<data type="boolean"/>
|
|
51
|
+
</attribute>
|
|
52
|
+
</optional>
|
|
48
53
|
<text/>
|
|
49
54
|
</element>
|
|
50
55
|
</define>
|
|
@@ -199,6 +204,18 @@
|
|
|
199
204
|
</zeroOrMore>
|
|
200
205
|
</element>
|
|
201
206
|
</define>
|
|
207
|
+
<define name="dt">
|
|
208
|
+
<element name="dt">
|
|
209
|
+
<optional>
|
|
210
|
+
<attribute name="id">
|
|
211
|
+
<data type="ID"/>
|
|
212
|
+
</attribute>
|
|
213
|
+
</optional>
|
|
214
|
+
<zeroOrMore>
|
|
215
|
+
<ref name="TextElement"/>
|
|
216
|
+
</zeroOrMore>
|
|
217
|
+
</element>
|
|
218
|
+
</define>
|
|
202
219
|
<define name="example">
|
|
203
220
|
<element name="example">
|
|
204
221
|
<attribute name="id">
|
|
@@ -543,6 +560,9 @@
|
|
|
543
560
|
</define>
|
|
544
561
|
<define name="BibDataExtensionType">
|
|
545
562
|
<ref name="doctype"/>
|
|
563
|
+
<optional>
|
|
564
|
+
<ref name="docsubtype"/>
|
|
565
|
+
</optional>
|
|
546
566
|
<optional>
|
|
547
567
|
<ref name="editorialgroup"/>
|
|
548
568
|
</optional>
|
|
@@ -890,6 +910,14 @@
|
|
|
890
910
|
</define>
|
|
891
911
|
</include>
|
|
892
912
|
<!-- end overrides -->
|
|
913
|
+
<define name="docsubtype">
|
|
914
|
+
<element name="subdoctype">
|
|
915
|
+
<ref name="DocumentSubtype"/>
|
|
916
|
+
</element>
|
|
917
|
+
</define>
|
|
918
|
+
<define name="DocumentSubtype">
|
|
919
|
+
<text/>
|
|
920
|
+
</define>
|
|
893
921
|
<define name="colgroup">
|
|
894
922
|
<element name="colgroup">
|
|
895
923
|
<oneOrMore>
|
|
@@ -939,7 +967,34 @@
|
|
|
939
967
|
<define name="concept">
|
|
940
968
|
<element name="concept">
|
|
941
969
|
<optional>
|
|
942
|
-
<attribute name="
|
|
970
|
+
<attribute name="ital">
|
|
971
|
+
<data type="boolean"/>
|
|
972
|
+
</attribute>
|
|
973
|
+
</optional>
|
|
974
|
+
<optional>
|
|
975
|
+
<attribute name="ref">
|
|
976
|
+
<data type="boolean"/>
|
|
977
|
+
</attribute>
|
|
978
|
+
</optional>
|
|
979
|
+
<optional>
|
|
980
|
+
<element name="refterm">
|
|
981
|
+
<zeroOrMore>
|
|
982
|
+
<choice>
|
|
983
|
+
<ref name="PureTextElement"/>
|
|
984
|
+
<ref name="stem"/>
|
|
985
|
+
</choice>
|
|
986
|
+
</zeroOrMore>
|
|
987
|
+
</element>
|
|
988
|
+
</optional>
|
|
989
|
+
<optional>
|
|
990
|
+
<element name="renderterm">
|
|
991
|
+
<zeroOrMore>
|
|
992
|
+
<choice>
|
|
993
|
+
<ref name="PureTextElement"/>
|
|
994
|
+
<ref name="stem"/>
|
|
995
|
+
</choice>
|
|
996
|
+
</zeroOrMore>
|
|
997
|
+
</element>
|
|
943
998
|
</optional>
|
|
944
999
|
<choice>
|
|
945
1000
|
<ref name="eref"/>
|
|
@@ -965,6 +1020,9 @@
|
|
|
965
1020
|
</attribute>
|
|
966
1021
|
<attribute name="name"/>
|
|
967
1022
|
<attribute name="action"/>
|
|
1023
|
+
<optional>
|
|
1024
|
+
<attribute name="class"/>
|
|
1025
|
+
</optional>
|
|
968
1026
|
<zeroOrMore>
|
|
969
1027
|
<choice>
|
|
970
1028
|
<ref name="TextElement"/>
|
|
@@ -1191,13 +1249,17 @@
|
|
|
1191
1249
|
</define>
|
|
1192
1250
|
<define name="IsoWorkgroup">
|
|
1193
1251
|
<optional>
|
|
1194
|
-
<attribute name="number"
|
|
1195
|
-
<data type="int"/>
|
|
1196
|
-
</attribute>
|
|
1252
|
+
<attribute name="number"/>
|
|
1197
1253
|
</optional>
|
|
1198
1254
|
<optional>
|
|
1199
1255
|
<attribute name="type"/>
|
|
1200
1256
|
</optional>
|
|
1257
|
+
<optional>
|
|
1258
|
+
<attribute name="identifier"/>
|
|
1259
|
+
</optional>
|
|
1260
|
+
<optional>
|
|
1261
|
+
<attribute name="prefix"/>
|
|
1262
|
+
</optional>
|
|
1201
1263
|
<text/>
|
|
1202
1264
|
</define>
|
|
1203
1265
|
<define name="ics">
|
|
@@ -1459,26 +1521,26 @@
|
|
|
1459
1521
|
<optional>
|
|
1460
1522
|
<ref name="section-title"/>
|
|
1461
1523
|
</optional>
|
|
1462
|
-
<
|
|
1524
|
+
<choice>
|
|
1463
1525
|
<choice>
|
|
1464
1526
|
<group>
|
|
1465
|
-
<
|
|
1527
|
+
<oneOrMore>
|
|
1466
1528
|
<ref name="BasicBlock"/>
|
|
1467
|
-
</
|
|
1529
|
+
</oneOrMore>
|
|
1468
1530
|
<zeroOrMore>
|
|
1469
1531
|
<ref name="note"/>
|
|
1470
1532
|
</zeroOrMore>
|
|
1471
1533
|
</group>
|
|
1472
1534
|
<ref name="amend"/>
|
|
1473
1535
|
</choice>
|
|
1474
|
-
<
|
|
1536
|
+
<oneOrMore>
|
|
1475
1537
|
<choice>
|
|
1476
1538
|
<ref name="clause-subsection"/>
|
|
1477
1539
|
<ref name="terms"/>
|
|
1478
1540
|
<ref name="definitions"/>
|
|
1479
1541
|
</choice>
|
|
1480
|
-
</
|
|
1481
|
-
</
|
|
1542
|
+
</oneOrMore>
|
|
1543
|
+
</choice>
|
|
1482
1544
|
</define>
|
|
1483
1545
|
<define name="Annex-Section">
|
|
1484
1546
|
<optional>
|
data/grammars/ogc.rng
CHANGED
|
@@ -62,6 +62,19 @@
|
|
|
62
62
|
</optional>
|
|
63
63
|
</element>
|
|
64
64
|
</define>
|
|
65
|
+
<define name="DocumentSubtype">
|
|
66
|
+
<choice>
|
|
67
|
+
<value>conceptual-model</value>
|
|
68
|
+
<value>conceptual-model-and-encoding</value>
|
|
69
|
+
<value>conceptual-model-and-implementation</value>
|
|
70
|
+
<value>encoding</value>
|
|
71
|
+
<value>extension</value>
|
|
72
|
+
<value>implementation</value>
|
|
73
|
+
<value>profile</value>
|
|
74
|
+
<value>profile-with-extension</value>
|
|
75
|
+
<value>general</value>
|
|
76
|
+
</choice>
|
|
77
|
+
</define>
|
|
65
78
|
</include>
|
|
66
79
|
<define name="TextElement" combine="choice">
|
|
67
80
|
<ref name="hi"/>
|
|
@@ -79,21 +92,6 @@
|
|
|
79
92
|
</zeroOrMore>
|
|
80
93
|
</element>
|
|
81
94
|
</define>
|
|
82
|
-
<define name="docsubtype">
|
|
83
|
-
<element name="docsubtype">
|
|
84
|
-
<choice>
|
|
85
|
-
<value>conceptual-model</value>
|
|
86
|
-
<value>conceptual-model-and-encoding</value>
|
|
87
|
-
<value>conceptual-model-and-implementation</value>
|
|
88
|
-
<value>encoding</value>
|
|
89
|
-
<value>extension</value>
|
|
90
|
-
<value>implementation</value>
|
|
91
|
-
<value>profile</value>
|
|
92
|
-
<value>profile-with-extension</value>
|
|
93
|
-
<value>general</value>
|
|
94
|
-
</choice>
|
|
95
|
-
</element>
|
|
96
|
-
</define>
|
|
97
95
|
<define name="submitters">
|
|
98
96
|
<element name="submitters">
|
|
99
97
|
<ref name="Basic-Section"/>
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
module RelatonOgc
|
|
2
|
+
class DataFetcher
|
|
3
|
+
module Utils
|
|
4
|
+
ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"\
|
|
5
|
+
"NamingAuthority/master/incubation/bibliography/"\
|
|
6
|
+
"bibliography.json".freeze
|
|
7
|
+
|
|
8
|
+
def get_data # rubocop:disable Metrics/AbcSize
|
|
9
|
+
h = {}
|
|
10
|
+
h["If-None-Match"] = etag if etag
|
|
11
|
+
resp = Faraday.new(ENDPOINT, headers: h).get
|
|
12
|
+
case resp.status
|
|
13
|
+
when 200
|
|
14
|
+
json = JSON.parse(resp.body)
|
|
15
|
+
block_given? ? yield(resp[:etag], json) : json
|
|
16
|
+
when 304 then [] # there aren't any changes since last fetching
|
|
17
|
+
else raise RelatonBib::RequestError, "Could not access #{ENDPOINT}"
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
#
|
|
22
|
+
# Read ETag form file
|
|
23
|
+
#
|
|
24
|
+
# @return [String, NilClass]
|
|
25
|
+
def etag
|
|
26
|
+
@etag ||= if File.exist? @etagfile
|
|
27
|
+
File.read @etagfile, encoding: "UTF-8"
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
#
|
|
32
|
+
# Save ETag to file
|
|
33
|
+
#
|
|
34
|
+
# @param tag [String]
|
|
35
|
+
def etag=(e_tag)
|
|
36
|
+
File.write @etagfile, e_tag, encoding: "UTF-8"
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
include Utils
|
|
41
|
+
|
|
42
|
+
#
|
|
43
|
+
# Create DataFetcher instance
|
|
44
|
+
#
|
|
45
|
+
# @param [String] output directory to save the documents
|
|
46
|
+
# @param [String] format output format "yaml" or "xmo"
|
|
47
|
+
#
|
|
48
|
+
def initialize(output, format)
|
|
49
|
+
@output = output
|
|
50
|
+
@etagfile = File.join output, "etag.txt"
|
|
51
|
+
@format = format
|
|
52
|
+
@docids = []
|
|
53
|
+
@dupids = []
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def self.fetch(output: "data", format: "yaml")
|
|
57
|
+
t1 = Time.now
|
|
58
|
+
puts "Started at: #{t1}"
|
|
59
|
+
FileUtils.mkdir_p output unless Dir.exist? output
|
|
60
|
+
new(output, format).fetch
|
|
61
|
+
t2 = Time.now
|
|
62
|
+
puts "Stopped at: #{t2}"
|
|
63
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def fetch # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
|
67
|
+
get_data do |etag, json|
|
|
68
|
+
no_errors = true
|
|
69
|
+
json.each do |_, hit|
|
|
70
|
+
next if hit["type"] == "CC"
|
|
71
|
+
|
|
72
|
+
bib = Scrapper.parse_page hit
|
|
73
|
+
write_document bib
|
|
74
|
+
rescue StandardError => e
|
|
75
|
+
no_errors = false
|
|
76
|
+
warn "Fetching document: #{hit['identifier']}"
|
|
77
|
+
warn "#{e.class} #{e.message}"
|
|
78
|
+
warn e.backtrace
|
|
79
|
+
end
|
|
80
|
+
warn "[relaton-ogc] WARNING Duplicated documents: #{@dupids.uniq.join(', ')}" if @dupids.any?
|
|
81
|
+
self.etag = etag if no_errors
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def write_document(bib) # rubocop:disable Metrics/AbcSize
|
|
86
|
+
if @docids.include?(bib.docidentifier[0].id)
|
|
87
|
+
@dupids << bib.docidentifier[0].id
|
|
88
|
+
return
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
@docids << bib.docidentifier[0].id
|
|
92
|
+
name = bib.docidentifier[0].id.upcase.gsub(/[\s:.]/, "_")
|
|
93
|
+
file = "#{@output}/#{name}.#{@format}"
|
|
94
|
+
content = @format == "xml" ? bib.to_xml(bibdata: true) : bib.to_hash.to_yaml
|
|
95
|
+
File.write file, content, encoding: "UTF-8"
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
data/lib/relaton_ogc/hit.rb
CHANGED
|
@@ -1,9 +1,20 @@
|
|
|
1
1
|
module RelatonOgc
|
|
2
2
|
class Hit < RelatonBib::Hit
|
|
3
|
+
#
|
|
4
|
+
# <Description>
|
|
5
|
+
#
|
|
6
|
+
# @param [RelatonOgc::OgcBibliographicItem] bibitem
|
|
7
|
+
# @param [RelatonOgc::HitCollection, nil] hitcoll
|
|
8
|
+
#
|
|
9
|
+
def initialize(bibitem, hitcoll = nil)
|
|
10
|
+
super({ id: bibitem.docidentifier[0].id}, hitcoll)
|
|
11
|
+
@fetch = bibitem
|
|
12
|
+
end
|
|
13
|
+
|
|
3
14
|
# Parse page.
|
|
4
15
|
# @return [RelatonNist::NistBliographicItem]
|
|
5
16
|
def fetch
|
|
6
|
-
@fetch ||= Scrapper.parse_page @hit
|
|
17
|
+
@fetch # ||= Scrapper.parse_page @hit
|
|
7
18
|
end
|
|
8
19
|
end
|
|
9
20
|
end
|
|
@@ -4,86 +4,71 @@ require "fileutils"
|
|
|
4
4
|
|
|
5
5
|
module RelatonOgc
|
|
6
6
|
class HitCollection < RelatonBib::HitCollection
|
|
7
|
-
|
|
8
|
-
"NamingAuthority/master/incubation/bibliography/bibliography.json".freeze
|
|
9
|
-
DATADIR = File.expand_path ".relaton/ogc/", Dir.home
|
|
10
|
-
DATAFILE = File.expand_path "bibliography.json", DATADIR
|
|
11
|
-
ETAGFILE = File.expand_path "etag.txt", DATADIR
|
|
7
|
+
# include DataFetcher::Utils
|
|
12
8
|
|
|
13
|
-
#
|
|
9
|
+
# ENDPOINT = "https://raw.githubusercontent.com/opengeospatial/"\
|
|
10
|
+
# "NamingAuthority/master/incubation/bibliography/"\
|
|
11
|
+
# "bibliography.json".freeze
|
|
12
|
+
ENDPOINT = "https://raw.githubusercontent.com/relaton/relaton-data-ogc/main/data/".freeze
|
|
13
|
+
# DATADIR = File.expand_path ".relaton/ogc/", Dir.home
|
|
14
|
+
# DATAFILE = File.expand_path "bibliography.json", DATADIR
|
|
15
|
+
# ETAGFILE = File.expand_path "etag.txt", DATADIR
|
|
16
|
+
|
|
17
|
+
# @param code [Strig]
|
|
14
18
|
# @param year [String]
|
|
15
19
|
# @param opts [Hash]
|
|
16
|
-
def initialize(
|
|
20
|
+
def initialize(code, year = nil)
|
|
17
21
|
super
|
|
18
|
-
@
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
# @etagfile = File.expand_path "etag.txt", DATADIR
|
|
23
|
+
# @array = from_json(ref).sort_by do |hit|
|
|
24
|
+
# hit.hit["date"] ? Date.parse(hit.hit["date"]) : Date.new
|
|
25
|
+
# rescue ArgumentError
|
|
26
|
+
# Date.parse "0000-01-01"
|
|
27
|
+
# end.reverse
|
|
28
|
+
resp = Faraday.get "#{ENDPOINT}#{code.upcase.gsub(/[\s:.]/, '_')}.yaml"
|
|
29
|
+
@array = case resp.status
|
|
30
|
+
when 200
|
|
31
|
+
bib = OgcBibliographicItem.from_hash YAML.safe_load(resp.body)
|
|
32
|
+
[Hit.new(bib, self)]
|
|
33
|
+
else []
|
|
34
|
+
end
|
|
25
35
|
end
|
|
26
36
|
|
|
27
|
-
private
|
|
37
|
+
# private
|
|
28
38
|
|
|
29
39
|
#
|
|
30
40
|
# Fetch data form json
|
|
31
41
|
#
|
|
32
42
|
# @param docid [String]
|
|
33
|
-
def from_json(docid, **_opts)
|
|
34
|
-
|
|
35
|
-
|
|
43
|
+
# def from_json(docid, **_opts)
|
|
44
|
+
# ref = docid.sub(/^OGC\s/, "").strip
|
|
45
|
+
# return [] if ref.empty?
|
|
36
46
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
end
|
|
47
|
+
# data.select do |_k, doc|
|
|
48
|
+
# doc["type"] != "CC" && doc["identifier"].include?(ref)
|
|
49
|
+
# end.map { |_k, h| Hit.new(h, self) }
|
|
50
|
+
# end
|
|
41
51
|
|
|
42
52
|
#
|
|
43
53
|
# Fetches json data
|
|
44
54
|
#
|
|
45
55
|
# @return [Hash]
|
|
46
|
-
def data
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
end
|
|
56
|
+
# def data
|
|
57
|
+
# ctime = File.ctime DATAFILE if File.exist? DATAFILE
|
|
58
|
+
# fetch_data if !ctime || ctime.to_date < Date.today
|
|
59
|
+
# @data ||= JSON.parse File.read(DATAFILE, encoding: "UTF-8")
|
|
60
|
+
# end
|
|
51
61
|
|
|
52
62
|
#
|
|
53
63
|
# fetch data form server and save it to file.
|
|
54
64
|
#
|
|
55
|
-
def fetch_data
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
resp = Faraday.new(ENDPOINT, headers: h).get
|
|
59
|
-
# return if there aren't any changes since last fetching
|
|
60
|
-
return if resp.status == 304
|
|
61
|
-
unless resp.status == 200
|
|
62
|
-
raise RelatonBib::RequestError, "Could not access #{ENDPOINT}"
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
FileUtils.mkdir_p DATADIR unless Dir.exist? DATADIR
|
|
66
|
-
self.etag = resp[:etag]
|
|
67
|
-
@data = JSON.parse resp.body
|
|
68
|
-
File.write DATAFILE, @data.to_json, encoding: "UTF-8"
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
#
|
|
72
|
-
# Read ETag form file
|
|
73
|
-
#
|
|
74
|
-
# @return [String, NilClass]
|
|
75
|
-
def etag
|
|
76
|
-
@etag ||= if File.exist? ETAGFILE
|
|
77
|
-
File.read ETAGFILE, encoding: "UTF-8"
|
|
78
|
-
end
|
|
79
|
-
end
|
|
65
|
+
# def fetch_data
|
|
66
|
+
# json = get_data
|
|
67
|
+
# return unless json
|
|
80
68
|
|
|
81
|
-
#
|
|
82
|
-
#
|
|
83
|
-
#
|
|
84
|
-
#
|
|
85
|
-
def etag=(e_tag)
|
|
86
|
-
File.write ETAGFILE, e_tag, encoding: "UTF-8"
|
|
87
|
-
end
|
|
69
|
+
# FileUtils.mkdir_p DATADIR unless Dir.exist? DATADIR
|
|
70
|
+
# @data = json
|
|
71
|
+
# File.write DATAFILE, @data.to_json, encoding: "UTF-8"
|
|
72
|
+
# end
|
|
88
73
|
end
|
|
89
74
|
end
|
|
@@ -14,17 +14,13 @@ module RelatonOgc
|
|
|
14
14
|
profile profile-with-extension general
|
|
15
15
|
].freeze
|
|
16
16
|
|
|
17
|
-
# @return [String]
|
|
18
|
-
attr_reader :docsubtype
|
|
19
|
-
|
|
20
|
-
# @param docsubtype [String]
|
|
21
17
|
def initialize(**args)
|
|
22
|
-
if args[:
|
|
18
|
+
if args[:subdoctype] && !SUBTYPES.include?(args[:subdoctype])
|
|
23
19
|
warn "[relaton-ogc] WARNING: invalid document "\
|
|
24
|
-
|
|
20
|
+
"subtype: #{args[:subdoctype]}"
|
|
25
21
|
end
|
|
26
22
|
|
|
27
|
-
@docsubtype = args.delete :docsubtype
|
|
23
|
+
# @docsubtype = args.delete :docsubtype
|
|
28
24
|
# @doctype = args.delete :doctype
|
|
29
25
|
super
|
|
30
26
|
end
|
|
@@ -33,15 +29,15 @@ module RelatonOgc
|
|
|
33
29
|
# @return [RelatonOgc::OgcBibliographicItem]
|
|
34
30
|
def self.from_hash(hash)
|
|
35
31
|
item_hash = ::RelatonOgc::HashConverter.hash_to_bib(hash)
|
|
36
|
-
new
|
|
32
|
+
new(**item_hash)
|
|
37
33
|
end
|
|
38
34
|
|
|
39
35
|
# @return [Hash]
|
|
40
|
-
def to_hash
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
end
|
|
36
|
+
# def to_hash
|
|
37
|
+
# hash = super
|
|
38
|
+
# hash["docsubtype"] = docsubtype if docsubtype
|
|
39
|
+
# hash
|
|
40
|
+
# end
|
|
45
41
|
|
|
46
42
|
# @param opts [Hash]
|
|
47
43
|
# @option opts [Nokogiri::XML::Builder] :builder XML builder
|
|
@@ -50,10 +46,10 @@ module RelatonOgc
|
|
|
50
46
|
# @option opts [String, Symbol] :lang language
|
|
51
47
|
# @return [String] XML
|
|
52
48
|
def to_xml(**opts)
|
|
53
|
-
super
|
|
49
|
+
super(**opts) do |b|
|
|
54
50
|
b.ext do
|
|
55
51
|
b.doctype doctype if doctype
|
|
56
|
-
b.
|
|
52
|
+
b.subdoctype subdoctype if subdoctype
|
|
57
53
|
editorialgroup&.to_xml b
|
|
58
54
|
ics.each { |i| i.to_xml b }
|
|
59
55
|
end
|
|
@@ -62,11 +58,11 @@ module RelatonOgc
|
|
|
62
58
|
|
|
63
59
|
# @param prefix [String]
|
|
64
60
|
# @return [String]
|
|
65
|
-
def to_asciibib(prefix = "")
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
end
|
|
61
|
+
# def to_asciibib(prefix = "")
|
|
62
|
+
# pref = prefix.empty? ? prefix : prefix + "."
|
|
63
|
+
# out = super
|
|
64
|
+
# out += "#{pref}docsubtype:: #{docsubtype}\n" if docsubtype
|
|
65
|
+
# out
|
|
66
|
+
# end
|
|
71
67
|
end
|
|
72
68
|
end
|
|
@@ -4,7 +4,8 @@ module RelatonOgc
|
|
|
4
4
|
# @param text [String]
|
|
5
5
|
# @return [RelatonOgc::HitCollection]
|
|
6
6
|
def search(text, year = nil, _opts = {})
|
|
7
|
-
|
|
7
|
+
code = text.sub(/^OGC\s/, "")
|
|
8
|
+
HitCollection.new code, year
|
|
8
9
|
rescue Faraday::ConnectionFailed
|
|
9
10
|
raise RelatonBib::RequestError, HitCollection::ENDPOINT
|
|
10
11
|
end
|
|
@@ -2,11 +2,12 @@ require "relaton/processor"
|
|
|
2
2
|
|
|
3
3
|
module RelatonOgc
|
|
4
4
|
class Processor < Relaton::Processor
|
|
5
|
-
def initialize
|
|
5
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
|
6
6
|
@short = :relaton_ogc
|
|
7
7
|
@prefix = "OGC"
|
|
8
8
|
@defaultprefix = %r{^OGC\s}
|
|
9
9
|
@idtype = "OGC"
|
|
10
|
+
@datasets = %w[ogc-naming-authority]
|
|
10
11
|
end
|
|
11
12
|
|
|
12
13
|
# @param code [String]
|
|
@@ -17,6 +18,18 @@ module RelatonOgc
|
|
|
17
18
|
::RelatonOgc::OgcBibliography.get(code, date, opts)
|
|
18
19
|
end
|
|
19
20
|
|
|
21
|
+
#
|
|
22
|
+
# Fetch all the documents from a source
|
|
23
|
+
#
|
|
24
|
+
# @param [String] _source source name
|
|
25
|
+
# @param [Hash] opts
|
|
26
|
+
# @option opts [String] :output directory to output documents
|
|
27
|
+
# @option opts [String] :format
|
|
28
|
+
#
|
|
29
|
+
def fetch_data(_source, opts)
|
|
30
|
+
DataFetcher.fetch(**opts)
|
|
31
|
+
end
|
|
32
|
+
|
|
20
33
|
# @param xml [String]
|
|
21
34
|
# @return [RelatonOgc::OgcBibliographicItem]
|
|
22
35
|
def from_xml(xml)
|
data/lib/relaton_ogc/scrapper.rb
CHANGED
|
@@ -13,7 +13,7 @@ module RelatonOgc
|
|
|
13
13
|
"IPR" => { type: "engineering-report" },
|
|
14
14
|
"IS" => { type: "standard", subtype: "implementation" },
|
|
15
15
|
"ISC" => { type: "standard", subtype: "implementation" },
|
|
16
|
-
"ISx" => { type: "standard", subtype: "
|
|
16
|
+
"ISx" => { type: "standard", subtype: "extension" },
|
|
17
17
|
"Notes" => { type: "other" },
|
|
18
18
|
"ORM" => { type: "reference-model" },
|
|
19
19
|
"PC" => { type: "standard", subtype: "profile" },
|
|
@@ -34,7 +34,7 @@ module RelatonOgc
|
|
|
34
34
|
class << self
|
|
35
35
|
# papam hit [Hash]
|
|
36
36
|
# @return [RelatonOgc::OrcBibliographicItem]
|
|
37
|
-
def parse_page(hit)
|
|
37
|
+
def parse_page(hit) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
|
38
38
|
type = fetch_type(hit["type"])
|
|
39
39
|
OgcBibliographicItem.new(
|
|
40
40
|
fetched: Date.today.to_s,
|
|
@@ -43,7 +43,7 @@ module RelatonOgc
|
|
|
43
43
|
docid: fetch_docid(hit["identifier"]),
|
|
44
44
|
link: fetch_link(hit["URL"]),
|
|
45
45
|
doctype: type[:type],
|
|
46
|
-
|
|
46
|
+
subdoctype: type[:subtype],
|
|
47
47
|
docstatus: fetch_status(type[:stage]),
|
|
48
48
|
edition: fetch_edition(hit["identifier"]),
|
|
49
49
|
abstract: fetch_abstract(hit["description"]),
|
|
@@ -88,7 +88,7 @@ module RelatonOgc
|
|
|
88
88
|
# @param stage [String]
|
|
89
89
|
# @return [RelatonBib::DocumentStatus, NilClass]
|
|
90
90
|
def fetch_status(stage)
|
|
91
|
-
stage && RelatonBib::
|
|
91
|
+
stage && RelatonBib::DocumentStatus.new(stage: stage)
|
|
92
92
|
end
|
|
93
93
|
|
|
94
94
|
# @param identifier [String]
|
|
@@ -138,6 +138,8 @@ module RelatonOgc
|
|
|
138
138
|
# @param date [String]
|
|
139
139
|
# @return [Array<RelatonBib::BibliographicDate>]
|
|
140
140
|
def fetch_date(date)
|
|
141
|
+
return [] unless date
|
|
142
|
+
|
|
141
143
|
[RelatonBib::BibliographicDate.new(type: "published", on: date)]
|
|
142
144
|
end
|
|
143
145
|
end
|
data/lib/relaton_ogc/version.rb
CHANGED
|
@@ -9,20 +9,20 @@ module RelatonOgc
|
|
|
9
9
|
# @param item_hash [Hash]
|
|
10
10
|
# @return [RelatonOgc::OgcBibliographicItem]
|
|
11
11
|
def bib_item(item_hash)
|
|
12
|
-
OgcBibliographicItem.new
|
|
12
|
+
OgcBibliographicItem.new(**item_hash)
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
# Override RelatonIsoBib::XMLParser.item_data method.
|
|
16
16
|
# @param item [Nokogiri::XML::Element]
|
|
17
17
|
# @returtn [Hash]
|
|
18
|
-
def item_data(item)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
18
|
+
# def item_data(item)
|
|
19
|
+
# data = super
|
|
20
|
+
# ext = item.at "./ext"
|
|
21
|
+
# return data unless ext
|
|
22
22
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
end
|
|
23
|
+
# data[:docsubtype] = ext.at("./docsubtype")&.text
|
|
24
|
+
# data
|
|
25
|
+
# end
|
|
26
26
|
|
|
27
27
|
# @TODO Organization doesn't recreated
|
|
28
28
|
# @param ext [Nokogiri::XML::Element]
|
|
@@ -35,7 +35,7 @@ module RelatonOgc
|
|
|
35
35
|
sc = iso_subgroup eg&.at("subcommittee")
|
|
36
36
|
wg = iso_subgroup eg&.at("workgroup")
|
|
37
37
|
EditorialGroup.new(
|
|
38
|
-
committee: committe, subcommittee: sc, workgroup: wg
|
|
38
|
+
committee: committe, subcommittee: sc, workgroup: wg,
|
|
39
39
|
)
|
|
40
40
|
end
|
|
41
41
|
end
|
data/lib/relaton_ogc.rb
CHANGED
|
@@ -2,6 +2,7 @@ require "relaton_iso_bib"
|
|
|
2
2
|
require "relaton_ogc/version"
|
|
3
3
|
require "relaton_ogc/ogc_bibliographic_item"
|
|
4
4
|
require "relaton_ogc/ogc_bibliography"
|
|
5
|
+
require "relaton_ogc/data_fetcher"
|
|
5
6
|
require "relaton_ogc/hit_collection"
|
|
6
7
|
require "relaton_ogc/scrapper"
|
|
7
8
|
require "relaton_ogc/xml_parser"
|
data/relaton_ogc.gemspec
CHANGED
|
@@ -23,18 +23,17 @@ Gem::Specification.new do |spec|
|
|
|
23
23
|
spec.bindir = "exe"
|
|
24
24
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
25
25
|
spec.require_paths = ["lib"]
|
|
26
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
|
|
26
27
|
|
|
27
|
-
# spec.add_development_dependency "debase"
|
|
28
28
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
|
29
29
|
spec.add_development_dependency "pry-byebug"
|
|
30
|
-
spec.add_development_dependency "rake", "~>
|
|
30
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
|
31
31
|
spec.add_development_dependency "rspec", "~> 3.0"
|
|
32
|
-
# spec.add_development_dependency "ruby-debug-ide"
|
|
33
32
|
spec.add_development_dependency "ruby-jing"
|
|
34
33
|
spec.add_development_dependency "simplecov"
|
|
35
34
|
spec.add_development_dependency "vcr"
|
|
36
35
|
spec.add_development_dependency "webmock"
|
|
37
36
|
|
|
38
37
|
spec.add_dependency "faraday", "~> 1.1"
|
|
39
|
-
spec.add_dependency "relaton-iso-bib", "~> 1.
|
|
38
|
+
spec.add_dependency "relaton-iso-bib", "~> 1.9.0"
|
|
40
39
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-ogc
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.9.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2021-
|
|
11
|
+
date: 2021-09-14 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: equivalent-xml
|
|
@@ -44,14 +44,14 @@ dependencies:
|
|
|
44
44
|
requirements:
|
|
45
45
|
- - "~>"
|
|
46
46
|
- !ruby/object:Gem::Version
|
|
47
|
-
version: '
|
|
47
|
+
version: '13.0'
|
|
48
48
|
type: :development
|
|
49
49
|
prerelease: false
|
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
51
51
|
requirements:
|
|
52
52
|
- - "~>"
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
|
-
version: '
|
|
54
|
+
version: '13.0'
|
|
55
55
|
- !ruby/object:Gem::Dependency
|
|
56
56
|
name: rspec
|
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -142,14 +142,14 @@ dependencies:
|
|
|
142
142
|
requirements:
|
|
143
143
|
- - "~>"
|
|
144
144
|
- !ruby/object:Gem::Version
|
|
145
|
-
version: 1.
|
|
145
|
+
version: 1.9.0
|
|
146
146
|
type: :runtime
|
|
147
147
|
prerelease: false
|
|
148
148
|
version_requirements: !ruby/object:Gem::Requirement
|
|
149
149
|
requirements:
|
|
150
150
|
- - "~>"
|
|
151
151
|
- !ruby/object:Gem::Version
|
|
152
|
-
version: 1.
|
|
152
|
+
version: 1.9.0
|
|
153
153
|
description: 'RelatonOgc: retrieve OGC Standards for bibliographic use using the OgcBibliographicItem
|
|
154
154
|
model'
|
|
155
155
|
email:
|
|
@@ -167,6 +167,7 @@ files:
|
|
|
167
167
|
- README.adoc
|
|
168
168
|
- Rakefile
|
|
169
169
|
- bin/console
|
|
170
|
+
- bin/rspec
|
|
170
171
|
- bin/setup
|
|
171
172
|
- grammars/basicdoc.rng
|
|
172
173
|
- grammars/biblio.rng
|
|
@@ -174,6 +175,7 @@ files:
|
|
|
174
175
|
- grammars/ogc.rng
|
|
175
176
|
- grammars/reqt.rng
|
|
176
177
|
- lib/relaton_ogc.rb
|
|
178
|
+
- lib/relaton_ogc/data_fetcher.rb
|
|
177
179
|
- lib/relaton_ogc/editorial_group.rb
|
|
178
180
|
- lib/relaton_ogc/hash_converter.rb
|
|
179
181
|
- lib/relaton_ogc/hit.rb
|
|
@@ -197,7 +199,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
197
199
|
requirements:
|
|
198
200
|
- - ">="
|
|
199
201
|
- !ruby/object:Gem::Version
|
|
200
|
-
version:
|
|
202
|
+
version: 2.5.0
|
|
201
203
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
202
204
|
requirements:
|
|
203
205
|
- - ">="
|