relaton-jis 2.0.0.pre.alpha.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/grammars/basicdoc.rng +14 -1
- data/grammars/biblio.rng +8 -8
- data/lib/relaton/jis/data_fetcher.rb +2 -5
- data/lib/relaton/jis/hit_collection.rb +1 -1
- data/lib/relaton/jis/scraper.rb +52 -23
- data/lib/relaton/jis/version.rb +1 -1
- data/relaton_jis.gemspec +2 -1
- metadata +17 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ac14dd89a5e207e3de88b287d2fa8a7a4b18fee184eca59e6083fc9733138c1a
|
|
4
|
+
data.tar.gz: 6c38de3d1235a9ab8cb1a78c2aa3c8d82930049f02733923650b7c1ee3a73d3c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a24537ee87d90330d588c1e7018b211af83973c94c90dad55cc28825f625d8aa20fd6a6ddc086771e7b05076f3f2310d9ac22bd6c1fdfe4343fae631da10bd6e
|
|
7
|
+
data.tar.gz: 7fecb2b10ff53ae3bfefcfa75bfe91d059c641a7eaef69ebf73d6530801287625904876b493e5e3b2099d58ecd36730a3bc72d1c67190c132f879cb144f74f03
|
data/grammars/basicdoc.rng
CHANGED
|
@@ -187,6 +187,15 @@ Applicable to modify and delete</a:documentation>
|
|
|
187
187
|
<a:documentation>Optional caption of this block</a:documentation>
|
|
188
188
|
</attribute>
|
|
189
189
|
</optional>
|
|
190
|
+
<optional>
|
|
191
|
+
<attribute name="position">
|
|
192
|
+
<a:documentation>For an "add" change, whether the change is added before or after the location</a:documentation>
|
|
193
|
+
<choice>
|
|
194
|
+
<value>before</value>
|
|
195
|
+
<value>after</value>
|
|
196
|
+
</choice>
|
|
197
|
+
</attribute>
|
|
198
|
+
</optional>
|
|
190
199
|
<optional>
|
|
191
200
|
<element name="location">
|
|
192
201
|
<a:documentation>The location(s) in the original document which have undergone the change described in this block</a:documentation>
|
|
@@ -208,11 +217,15 @@ Applicable to modify and delete</a:documentation>
|
|
|
208
217
|
</zeroOrMore>
|
|
209
218
|
<optional>
|
|
210
219
|
<element name="newcontent">
|
|
211
|
-
<a:documentation>New content to be added to the document; applicable to add and modify
|
|
220
|
+
<a:documentation>New content to be added to the document; applicable to add and modify.
|
|
221
|
+
Can be blocks and/or sections</a:documentation>
|
|
212
222
|
<ref name="OptionalId"/>
|
|
213
223
|
<zeroOrMore>
|
|
214
224
|
<ref name="BasicBlock"/>
|
|
215
225
|
</zeroOrMore>
|
|
226
|
+
<zeroOrMore>
|
|
227
|
+
<ref name="section"/>
|
|
228
|
+
</zeroOrMore>
|
|
216
229
|
</element>
|
|
217
230
|
</optional>
|
|
218
231
|
<zeroOrMore>
|
data/grammars/biblio.rng
CHANGED
|
@@ -1142,11 +1142,11 @@ NOTE: This should preferably be encoded as a URI or short identifier, rather th
|
|
|
1142
1142
|
<a:documentation>Information about how long the current description of the bibliographic item is valid for</a:documentation>
|
|
1143
1143
|
</ref>
|
|
1144
1144
|
</optional>
|
|
1145
|
-
<
|
|
1145
|
+
<zeroOrMore>
|
|
1146
1146
|
<ref name="depiction">
|
|
1147
1147
|
<a:documentation>Depiction of the bibliographic item, typically an image</a:documentation>
|
|
1148
1148
|
</ref>
|
|
1149
|
-
</
|
|
1149
|
+
</zeroOrMore>
|
|
1150
1150
|
</define>
|
|
1151
1151
|
<define name="ReducedBibliographicItem">
|
|
1152
1152
|
<a:documentation>Reduced description of a bibliographic resource, without mandatory title and docidentifier, used for document relations
|
|
@@ -1939,10 +1939,10 @@ Detailed in https://www.relaton.org/model/relations/</a:documentation>
|
|
|
1939
1939
|
<value>hasAnnotation</value>
|
|
1940
1940
|
<value>draftOf</value>
|
|
1941
1941
|
<value>hasDraft</value>
|
|
1942
|
-
<value>
|
|
1943
|
-
<value>
|
|
1944
|
-
<value>
|
|
1945
|
-
<value>
|
|
1942
|
+
<value>predecessorDraftOf</value>
|
|
1943
|
+
<value>hasPredecessorDraft</value>
|
|
1944
|
+
<value>successorDraftOf</value>
|
|
1945
|
+
<value>hasSuccessorDraft</value>
|
|
1946
1946
|
<value>editionOf</value>
|
|
1947
1947
|
<value>hasEdition</value>
|
|
1948
1948
|
<value>updates</value>
|
|
@@ -2063,13 +2063,13 @@ provided that it is not the entire bibliographic item that is so related</a:docu
|
|
|
2063
2063
|
<ref name="LocalizedString"/>
|
|
2064
2064
|
</element>
|
|
2065
2065
|
</optional>
|
|
2066
|
-
<
|
|
2066
|
+
<zeroOrMore>
|
|
2067
2067
|
<element name="taxon">
|
|
2068
2068
|
<a:documentation>The keywords as a hierarchical taxonomy. For example, the sequence of `taxon` elements
|
|
2069
2069
|
`pump`, `centrifugal pump`, `line shaft pump` represents a taxonomic classification</a:documentation>
|
|
2070
2070
|
<ref name="LocalizedString"/>
|
|
2071
2071
|
</element>
|
|
2072
|
-
</
|
|
2072
|
+
</zeroOrMore>
|
|
2073
2073
|
<zeroOrMore>
|
|
2074
2074
|
<ref name="vocabid">
|
|
2075
2075
|
<a:documentation>Identifiers for the keyword as a controlled vocabulary</a:documentation>
|
|
@@ -15,10 +15,6 @@ module Relaton
|
|
|
15
15
|
@mutex = Mutex.new
|
|
16
16
|
end
|
|
17
17
|
|
|
18
|
-
def gh_issue_channel
|
|
19
|
-
["relaton/relaton-jis", "Error fetching JIS documents"]
|
|
20
|
-
end
|
|
21
|
-
|
|
22
18
|
def log_error(msg)
|
|
23
19
|
Util.error msg
|
|
24
20
|
end
|
|
@@ -52,7 +48,7 @@ module Relaton
|
|
|
52
48
|
def fetch_doc(url) # rubocop:disable Metrics/MethodLength
|
|
53
49
|
attempts = 0
|
|
54
50
|
begin
|
|
55
|
-
bib = Scraper.new(url).fetch
|
|
51
|
+
bib = Scraper.new(url, @errors).fetch
|
|
56
52
|
rescue StandardError => e
|
|
57
53
|
attempts += 1
|
|
58
54
|
if attempts < 5
|
|
@@ -73,6 +69,7 @@ module Relaton
|
|
|
73
69
|
resp = agent.get "#{URL}W11M0070/index"
|
|
74
70
|
parse_page resp
|
|
75
71
|
index.save
|
|
72
|
+
report_errors
|
|
76
73
|
end
|
|
77
74
|
|
|
78
75
|
def initial_post
|
|
@@ -74,7 +74,7 @@ module Relaton
|
|
|
74
74
|
content: hit.hit[:id], type: "JIS", primary: true,
|
|
75
75
|
)
|
|
76
76
|
bibitem = Bib::ItemData.new(
|
|
77
|
-
formattedref: hit.hit[:id], docidentifier: [docid],
|
|
77
|
+
formattedref: Bib::Formattedref.new(content: hit.hit[:id]), docidentifier: [docid],
|
|
78
78
|
)
|
|
79
79
|
Bib::Relation.new(type: "instanceOf", bibitem: bibitem)
|
|
80
80
|
end
|
data/lib/relaton/jis/scraper.rb
CHANGED
|
@@ -15,9 +15,10 @@ module Relaton
|
|
|
15
15
|
DATETYPES = { "発行年月日" => "issued", "確認年月日" => "confirmed" }.freeze
|
|
16
16
|
STATUSES = { "有効" => "valid", "廃止" => "withdrawn" }.freeze
|
|
17
17
|
|
|
18
|
-
def initialize(url)
|
|
18
|
+
def initialize(url, errors = {})
|
|
19
19
|
@url = url
|
|
20
20
|
@agent = Mechanize.new
|
|
21
|
+
@errors = errors
|
|
21
22
|
end
|
|
22
23
|
|
|
23
24
|
def fetch # rubocop:disable Metrics/MethodLength
|
|
@@ -25,18 +26,18 @@ module Relaton
|
|
|
25
26
|
contributors = fetch_contributor
|
|
26
27
|
eg_contributor = fetch_editorialgroup_contributor
|
|
27
28
|
contributors << eg_contributor if eg_contributor
|
|
28
|
-
attrs = ATTRS.
|
|
29
|
-
hash[attr] = send "fetch_#{attr}"
|
|
30
|
-
end
|
|
29
|
+
attrs = ATTRS.to_h { |attr| [attr, send("fetch_#{attr}")] }
|
|
31
30
|
attrs[:contributor] = contributors
|
|
32
31
|
Bib::ItemData.new(**attrs)
|
|
33
32
|
end
|
|
34
33
|
|
|
35
34
|
def fetch_title
|
|
36
|
-
{ "ja" => "Jpan", "en" => "Latn" }.map.with_index do |(lang, script), i|
|
|
35
|
+
result = { "ja" => "Jpan", "en" => "Latn" }.map.with_index do |(lang, script), i|
|
|
37
36
|
content = @doc.at("./h2/text()[#{i + 2}]").text.strip
|
|
38
37
|
Bib::Title.new content: content, language: lang, script: script
|
|
39
38
|
end
|
|
39
|
+
@errors[:title] &&= result.empty?
|
|
40
|
+
result
|
|
40
41
|
end
|
|
41
42
|
|
|
42
43
|
def fetch_source # rubocop:disable Metrics/MethodLength
|
|
@@ -44,45 +45,60 @@ module Relaton
|
|
|
44
45
|
uri = URI @url
|
|
45
46
|
domain = "#{uri.scheme}://#{uri.host}"
|
|
46
47
|
xpath = "./dl/dt[.='プレビュー']/following-sibling::dd[1]/a"
|
|
47
|
-
@doc.xpath(xpath).reduce([src]) do |mem, node|
|
|
48
|
+
result = @doc.xpath(xpath).reduce([src]) do |mem, node|
|
|
48
49
|
href = "#{domain}#{node[:href]}"
|
|
49
50
|
mem << Bib::Uri.new(content: href, type: "pdf")
|
|
50
51
|
end
|
|
52
|
+
@errors[:source] &&= result.empty?
|
|
53
|
+
result
|
|
51
54
|
end
|
|
52
55
|
|
|
53
56
|
def fetch_abstract
|
|
54
|
-
@doc.xpath("//div[@id='honbun']").map do |node|
|
|
55
|
-
Bib::
|
|
57
|
+
result = @doc.xpath("//div[@id='honbun']").map do |node|
|
|
58
|
+
Bib::Abstract.new(
|
|
56
59
|
content: node.text.strip,
|
|
57
60
|
language: "ja", script: "Jpan"
|
|
58
61
|
)
|
|
59
62
|
end
|
|
63
|
+
@errors[:abstract] &&= result.empty?
|
|
64
|
+
result
|
|
60
65
|
end
|
|
61
66
|
|
|
62
67
|
def fetch_docidentifier
|
|
63
68
|
docid = document_id
|
|
69
|
+
@errors[:docidentifier] &&= docid.nil? || docid.empty?
|
|
70
|
+
return [] if docid.nil? || docid.empty?
|
|
71
|
+
|
|
64
72
|
[Docidentifier.new(
|
|
65
73
|
content: docid, type: "JIS", primary: true,
|
|
66
74
|
)]
|
|
67
75
|
end
|
|
68
76
|
|
|
69
77
|
def fetch_docnumber
|
|
70
|
-
|
|
78
|
+
docid = document_id
|
|
79
|
+
match = docid&.match(/^\w+\s(\w)\s?(\d+)/)
|
|
80
|
+
@errors[:docnumber] &&= match.nil?
|
|
81
|
+
return unless match
|
|
82
|
+
|
|
71
83
|
"#{match[1]}#{match[2]}"
|
|
72
84
|
end
|
|
73
85
|
|
|
74
86
|
def document_id
|
|
75
|
-
@document_id ||= @doc.at("./h2/text()[1]")
|
|
87
|
+
@document_id ||= @doc.at("./h2/text()[1]")&.text&.strip
|
|
76
88
|
end
|
|
77
89
|
|
|
78
90
|
def fetch_date
|
|
79
|
-
DATETYPES.each_with_object([]) do |(key, type), a|
|
|
91
|
+
result = DATETYPES.each_with_object([]) do |(key, type), a|
|
|
80
92
|
node = @doc.at("./div/div/div/p/text()[contains(.,'#{key}')]")
|
|
81
93
|
next unless node
|
|
82
94
|
|
|
83
95
|
at = node.text.match(/\d{4}-\d{2}-\d{2}/).to_s
|
|
96
|
+
next if at.empty?
|
|
97
|
+
|
|
84
98
|
a << Bib::Date.new(type: type, at: at)
|
|
85
99
|
end
|
|
100
|
+
@errors[:date] &&= result.empty?
|
|
101
|
+
result
|
|
86
102
|
end
|
|
87
103
|
|
|
88
104
|
def fetch_type
|
|
@@ -97,15 +113,19 @@ module Relaton
|
|
|
97
113
|
langs_scripts.map { |l| l[:script] }
|
|
98
114
|
end
|
|
99
115
|
|
|
100
|
-
def langs_scripts
|
|
101
|
-
@langs_scripts ||=
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
116
|
+
def langs_scripts # rubocop:disable Metrics/MethodLength
|
|
117
|
+
@langs_scripts ||= begin
|
|
118
|
+
result = LANGS.each_with_object([]) do |(key, lang), a|
|
|
119
|
+
l = @doc.at(
|
|
120
|
+
"./div/div/div[@class='blockContentFile']/div/div/p[1]" \
|
|
121
|
+
"/span[contains(.,'#{key}')]/following-sibling::span",
|
|
122
|
+
)
|
|
123
|
+
next if l.nil? || l.text.strip == "-"
|
|
107
124
|
|
|
108
|
-
|
|
125
|
+
a << lang
|
|
126
|
+
end
|
|
127
|
+
@errors[:language] &&= result.empty?
|
|
128
|
+
result
|
|
109
129
|
end
|
|
110
130
|
end
|
|
111
131
|
|
|
@@ -113,24 +133,30 @@ module Relaton
|
|
|
113
133
|
xpath = "./div/div/div/p/text()[contains(.,'状態')]" \
|
|
114
134
|
"/following-sibling::span"
|
|
115
135
|
st = @doc.at(xpath)
|
|
116
|
-
|
|
136
|
+
status_val = STATUSES[st&.text&.strip]
|
|
137
|
+
@errors[:status] &&= status_val.nil?
|
|
138
|
+
return unless status_val
|
|
117
139
|
|
|
118
|
-
stage = Bib::Status::Stage.new(content:
|
|
140
|
+
stage = Bib::Status::Stage.new(content: status_val)
|
|
119
141
|
Bib::Status.new(stage: stage)
|
|
120
142
|
end
|
|
121
143
|
|
|
122
|
-
def fetch_doctype
|
|
144
|
+
def fetch_doctype # rubocop:disable Metrics/CyclomaticComplexity
|
|
123
145
|
type = case document_id
|
|
124
146
|
when /JIS\s[A-Z]\s[\w-]+:\d{4}\/AMENDMENT/ then "amendment"
|
|
125
147
|
when /JIS\s[A-Z]\s[\w-]+/ then "japanese-industrial-standard"
|
|
126
148
|
when /TR[\s\/][\w-]+/ then "technical-report"
|
|
127
149
|
when /TS[\s\/][\w-]+/ then "technical-specification"
|
|
128
150
|
end
|
|
151
|
+
@errors[:doctype] &&= type.nil?
|
|
152
|
+
return unless type
|
|
153
|
+
|
|
129
154
|
Doctype.new content: type
|
|
130
155
|
end
|
|
131
156
|
|
|
132
157
|
def fetch_ics
|
|
133
158
|
td = @doc.at("./table/tr[th[.='ICS']]/td")
|
|
159
|
+
@errors[:ics] &&= td.nil?
|
|
134
160
|
return [] unless td
|
|
135
161
|
|
|
136
162
|
td.text.strip.split.map { |code| Bib::ICS.new code: code }
|
|
@@ -141,10 +167,12 @@ module Relaton
|
|
|
141
167
|
"一般財団法人 日本規格協会", "authorizer"
|
|
142
168
|
)
|
|
143
169
|
xpath = "./table/tr[th[.='原案作成団体']]/td"
|
|
144
|
-
@doc.xpath(xpath).reduce([authorizer]) do |a, node|
|
|
170
|
+
result = @doc.xpath(xpath).reduce([authorizer]) do |a, node|
|
|
145
171
|
a << create_contrib(node.text.strip, "author")
|
|
146
172
|
a << create_contrib(node.text.strip, "publisher")
|
|
147
173
|
end
|
|
174
|
+
@errors[:contributor] &&= result.empty?
|
|
175
|
+
result
|
|
148
176
|
end
|
|
149
177
|
|
|
150
178
|
def create_contrib(name, role)
|
|
@@ -167,6 +195,7 @@ module Relaton
|
|
|
167
195
|
|
|
168
196
|
def fetch_editorialgroup_contributor # rubocop:disable Metrics/MethodLength
|
|
169
197
|
node = @doc.at("./table/tr[th[.='原案作成団体']]/td")
|
|
198
|
+
@errors[:editorialgroup] &&= node.nil?
|
|
170
199
|
return unless node
|
|
171
200
|
|
|
172
201
|
subdivision = Bib::Subdivision.new(
|
data/lib/relaton/jis/version.rb
CHANGED
data/relaton_jis.gemspec
CHANGED
|
@@ -35,8 +35,9 @@ Gem::Specification.new do |spec|
|
|
|
35
35
|
|
|
36
36
|
# Uncomment to register a new dependency of your gem
|
|
37
37
|
spec.add_dependency "mechanize", "~> 2.10"
|
|
38
|
+
spec.add_dependency "relaton-core", "~> 0.0.13"
|
|
38
39
|
spec.add_dependency "relaton-index", "~> 0.2.0"
|
|
39
|
-
spec.add_dependency "relaton-iso", "~> 2.0.0
|
|
40
|
+
spec.add_dependency "relaton-iso", "~> 2.0.0"
|
|
40
41
|
|
|
41
42
|
# For more information and examples about making a new gem, check out our
|
|
42
43
|
# guide at: https://bundler.io/guides/creating_gem.html
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-jis
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.0.0
|
|
4
|
+
version: 2.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
@@ -23,6 +23,20 @@ dependencies:
|
|
|
23
23
|
- - "~>"
|
|
24
24
|
- !ruby/object:Gem::Version
|
|
25
25
|
version: '2.10'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: relaton-core
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - "~>"
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: 0.0.13
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - "~>"
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: 0.0.13
|
|
26
40
|
- !ruby/object:Gem::Dependency
|
|
27
41
|
name: relaton-index
|
|
28
42
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -43,14 +57,14 @@ dependencies:
|
|
|
43
57
|
requirements:
|
|
44
58
|
- - "~>"
|
|
45
59
|
- !ruby/object:Gem::Version
|
|
46
|
-
version: 2.0.0
|
|
60
|
+
version: 2.0.0
|
|
47
61
|
type: :runtime
|
|
48
62
|
prerelease: false
|
|
49
63
|
version_requirements: !ruby/object:Gem::Requirement
|
|
50
64
|
requirements:
|
|
51
65
|
- - "~>"
|
|
52
66
|
- !ruby/object:Gem::Version
|
|
53
|
-
version: 2.0.0
|
|
67
|
+
version: 2.0.0
|
|
54
68
|
description: 'Relaton::Jis: retrieve IETF Standards for bibliographic use using the
|
|
55
69
|
BibliographicItem model'
|
|
56
70
|
email:
|