relaton-jis 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +12 -0
- data/Gemfile +20 -0
- data/LICENSE.txt +21 -0
- data/README.adoc +141 -0
- data/Rakefile +12 -0
- data/grammars/basicdoc.rng +1125 -0
- data/grammars/biblio-standoc.rng +164 -0
- data/grammars/biblio.rng +1461 -0
- data/grammars/relaton-jis-compile.rng +11 -0
- data/grammars/relaton-jis.rng +231 -0
- data/lib/relaton_jis/bibliographic_item.rb +12 -0
- data/lib/relaton_jis/bibliography.rb +58 -0
- data/lib/relaton_jis/hash_converter.rb +9 -0
- data/lib/relaton_jis/hit.rb +48 -0
- data/lib/relaton_jis/hit_collection.rb +58 -0
- data/lib/relaton_jis/processor.rb +41 -0
- data/lib/relaton_jis/scraper.rb +132 -0
- data/lib/relaton_jis/version.rb +5 -0
- data/lib/relaton_jis/xml_parser.rb +9 -0
- data/lib/relaton_jis.rb +25 -0
- data/relaton_jis.gemspec +42 -0
- data/sig/relaton_jis.rbs +4 -0
- metadata +97 -0
@@ -0,0 +1,11 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
|
3
|
+
<include href="basicdoc.rng"/>
|
4
|
+
<include href="relaton-jis.rng"/>
|
5
|
+
<start>
|
6
|
+
<choice>
|
7
|
+
<ref name="bibitem"/>
|
8
|
+
<ref name="bibdata"/>
|
9
|
+
</choice>
|
10
|
+
</start>
|
11
|
+
</grammar>
|
@@ -0,0 +1,231 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<grammar xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
|
3
|
+
<include href="biblio-standoc.rng">
|
4
|
+
<define name="BibDataExtensionType">
|
5
|
+
<optional>
|
6
|
+
<attribute name="schema-version"/>
|
7
|
+
</optional>
|
8
|
+
<ref name="doctype"/>
|
9
|
+
<optional>
|
10
|
+
<ref name="docsubtype"/>
|
11
|
+
</optional>
|
12
|
+
<ref name="editorialgroup"/>
|
13
|
+
<zeroOrMore>
|
14
|
+
<ref name="ics"/>
|
15
|
+
</zeroOrMore>
|
16
|
+
<ref name="structuredidentifier"/>
|
17
|
+
<optional>
|
18
|
+
<ref name="stagename"/>
|
19
|
+
</optional>
|
20
|
+
</define>
|
21
|
+
<define name="bdate">
|
22
|
+
<element name="date">
|
23
|
+
<attribute name="type">
|
24
|
+
<choice>
|
25
|
+
<ref name="BibliographicDateType"/>
|
26
|
+
<text/>
|
27
|
+
</choice>
|
28
|
+
</attribute>
|
29
|
+
<choice>
|
30
|
+
<group>
|
31
|
+
<element name="from">
|
32
|
+
<ref name="ISO8601Date"/>
|
33
|
+
</element>
|
34
|
+
<optional>
|
35
|
+
<element name="to">
|
36
|
+
<ref name="ISO8601Date"/>
|
37
|
+
</element>
|
38
|
+
</optional>
|
39
|
+
</group>
|
40
|
+
<element name="on">
|
41
|
+
<choice>
|
42
|
+
<ref name="ISO8601Date"/>
|
43
|
+
<value>--</value>
|
44
|
+
<value>–</value>
|
45
|
+
</choice>
|
46
|
+
</element>
|
47
|
+
</choice>
|
48
|
+
</element>
|
49
|
+
</define>
|
50
|
+
<define name="organization">
|
51
|
+
<element name="organization">
|
52
|
+
<oneOrMore>
|
53
|
+
<ref name="orgname"/>
|
54
|
+
</oneOrMore>
|
55
|
+
<optional>
|
56
|
+
<ref name="abbreviation"/>
|
57
|
+
</optional>
|
58
|
+
<optional>
|
59
|
+
<ref name="uri"/>
|
60
|
+
</optional>
|
61
|
+
<zeroOrMore>
|
62
|
+
<ref name="org-identifier"/>
|
63
|
+
</zeroOrMore>
|
64
|
+
<zeroOrMore>
|
65
|
+
<ref name="contact"/>
|
66
|
+
</zeroOrMore>
|
67
|
+
<optional>
|
68
|
+
<ref name="technical-committee"/>
|
69
|
+
</optional>
|
70
|
+
<optional>
|
71
|
+
<ref name="subcommittee"/>
|
72
|
+
</optional>
|
73
|
+
<optional>
|
74
|
+
<ref name="workgroup"/>
|
75
|
+
</optional>
|
76
|
+
<optional>
|
77
|
+
<ref name="secretariat"/>
|
78
|
+
</optional>
|
79
|
+
</element>
|
80
|
+
</define>
|
81
|
+
<define name="DocumentType">
|
82
|
+
<choice>
|
83
|
+
<value>british-standard</value>
|
84
|
+
<value>draft-for-development</value>
|
85
|
+
<value>published-document</value>
|
86
|
+
<value>privately-subscribed-standard</value>
|
87
|
+
<value>publicly-available-specification</value>
|
88
|
+
<value>flex-standard</value>
|
89
|
+
<value>international-standard</value>
|
90
|
+
<value>technical-specification</value>
|
91
|
+
<value>technical-report</value>
|
92
|
+
<value>guide</value>
|
93
|
+
<value>international-workshop-agreement</value>
|
94
|
+
<value>industry-technical-agreement</value>
|
95
|
+
<value>standard</value>
|
96
|
+
<value>european-workshop-agreement</value>
|
97
|
+
<value>fast-track-standard</value>
|
98
|
+
<value>expert-commentary</value>
|
99
|
+
</choice>
|
100
|
+
</define>
|
101
|
+
<define name="DocumentSubtype">
|
102
|
+
<choice>
|
103
|
+
<value>specification</value>
|
104
|
+
<value>method-of-test</value>
|
105
|
+
<value>method-of-specifying</value>
|
106
|
+
<value>vocabulary</value>
|
107
|
+
<value>code-of-practice</value>
|
108
|
+
</choice>
|
109
|
+
</define>
|
110
|
+
<define name="structuredidentifier">
|
111
|
+
<element name="structuredidentifier">
|
112
|
+
<optional>
|
113
|
+
<attribute name="type"/>
|
114
|
+
</optional>
|
115
|
+
<group>
|
116
|
+
<ref name="documentnumber"/>
|
117
|
+
<optional>
|
118
|
+
<ref name="tc-documentnumber"/>
|
119
|
+
</optional>
|
120
|
+
</group>
|
121
|
+
</element>
|
122
|
+
</define>
|
123
|
+
<define name="editorialgroup">
|
124
|
+
<element name="editorialgroup">
|
125
|
+
<ref name="ISOProjectGroup"/>
|
126
|
+
</element>
|
127
|
+
</define>
|
128
|
+
</include>
|
129
|
+
<define name="updates_document_type">
|
130
|
+
<element name="updates-document-type">
|
131
|
+
<ref name="DocumentType"/>
|
132
|
+
</element>
|
133
|
+
</define>
|
134
|
+
<define name="ISOProjectGroup">
|
135
|
+
<zeroOrMore>
|
136
|
+
<ref name="agency"/>
|
137
|
+
</zeroOrMore>
|
138
|
+
<oneOrMore>
|
139
|
+
<ref name="technical-committee"/>
|
140
|
+
</oneOrMore>
|
141
|
+
<zeroOrMore>
|
142
|
+
<ref name="subcommittee"/>
|
143
|
+
</zeroOrMore>
|
144
|
+
<zeroOrMore>
|
145
|
+
<ref name="workgroup"/>
|
146
|
+
</zeroOrMore>
|
147
|
+
<optional>
|
148
|
+
<ref name="secretariat"/>
|
149
|
+
</optional>
|
150
|
+
</define>
|
151
|
+
<define name="approvalgroup">
|
152
|
+
<element name="approvalgroup">
|
153
|
+
<ref name="ISOProjectGroup"/>
|
154
|
+
</element>
|
155
|
+
</define>
|
156
|
+
<define name="agency">
|
157
|
+
<element name="agency">
|
158
|
+
<text/>
|
159
|
+
</element>
|
160
|
+
</define>
|
161
|
+
<define name="horizontal">
|
162
|
+
<element name="horizontal">
|
163
|
+
<data type="boolean"/>
|
164
|
+
</element>
|
165
|
+
</define>
|
166
|
+
<define name="documentnumber">
|
167
|
+
<element name="project-number">
|
168
|
+
<optional>
|
169
|
+
<attribute name="part">
|
170
|
+
<data type="int"/>
|
171
|
+
</attribute>
|
172
|
+
</optional>
|
173
|
+
<optional>
|
174
|
+
<attribute name="subpart">
|
175
|
+
<data type="int"/>
|
176
|
+
</attribute>
|
177
|
+
</optional>
|
178
|
+
<optional>
|
179
|
+
<attribute name="amendment">
|
180
|
+
<data type="int"/>
|
181
|
+
</attribute>
|
182
|
+
</optional>
|
183
|
+
<optional>
|
184
|
+
<attribute name="corrigendum">
|
185
|
+
<data type="int"/>
|
186
|
+
</attribute>
|
187
|
+
</optional>
|
188
|
+
<optional>
|
189
|
+
<attribute name="origyr">
|
190
|
+
<ref name="ISO8601Date"/>
|
191
|
+
</attribute>
|
192
|
+
</optional>
|
193
|
+
<text/>
|
194
|
+
</element>
|
195
|
+
</define>
|
196
|
+
<define name="tc-documentnumber">
|
197
|
+
<element name="tc-document-number">
|
198
|
+
<data type="int"/>
|
199
|
+
</element>
|
200
|
+
</define>
|
201
|
+
<define name="subcommittee">
|
202
|
+
<element name="subcommittee">
|
203
|
+
<ref name="IsoWorkgroup"/>
|
204
|
+
</element>
|
205
|
+
</define>
|
206
|
+
<define name="workgroup">
|
207
|
+
<element name="workgroup">
|
208
|
+
<ref name="IsoWorkgroup"/>
|
209
|
+
</element>
|
210
|
+
</define>
|
211
|
+
<define name="secretariat">
|
212
|
+
<element name="secretariat">
|
213
|
+
<text/>
|
214
|
+
</element>
|
215
|
+
</define>
|
216
|
+
<define name="stagename">
|
217
|
+
<element name="stagename">
|
218
|
+
<optional>
|
219
|
+
<attribute name="abbreviation"/>
|
220
|
+
</optional>
|
221
|
+
<text/>
|
222
|
+
</element>
|
223
|
+
</define>
|
224
|
+
<define name="coverimages">
|
225
|
+
<element name="coverimages">
|
226
|
+
<oneOrMore>
|
227
|
+
<ref name="image"/>
|
228
|
+
</oneOrMore>
|
229
|
+
</element>
|
230
|
+
</define>
|
231
|
+
</grammar>
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module RelatonJis
|
2
|
+
class BibliographicItem < RelatonIsoBib::IsoBibliographicItem
|
3
|
+
#
|
4
|
+
# Fetch the flavor shcema version
|
5
|
+
#
|
6
|
+
# @return [String] schema version
|
7
|
+
#
|
8
|
+
def ext_schema
|
9
|
+
@ext_schema ||= schema_versions["relaton-model-jis"]
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module RelatonJis
|
2
|
+
module Bibliography
|
3
|
+
extend self
|
4
|
+
|
5
|
+
SOURCE = "https://webdesk.jsa.or.jp/books/W11M".freeze
|
6
|
+
|
7
|
+
#
|
8
|
+
# Search JIS by keyword
|
9
|
+
#
|
10
|
+
# @param [String] code JIS documetnt code
|
11
|
+
# @param [String, nil] year JIS document year
|
12
|
+
#
|
13
|
+
# @return [RelatonJis::HitCollection] search result
|
14
|
+
#
|
15
|
+
def search(code, year = nil)
|
16
|
+
agent = Mechanize.new
|
17
|
+
resp = agent.post "#{SOURCE}0010/searchByKeyword", search_type: "JIS", keyword: code
|
18
|
+
disp = JSON.parse resp.body
|
19
|
+
result = agent.get "#{SOURCE}#{disp['disp_screen']}/index"
|
20
|
+
HitCollection.new code, year, result: result.xpath("//div[@class='blockGenaral']")
|
21
|
+
end
|
22
|
+
|
23
|
+
#
|
24
|
+
# Get JIS document by reference
|
25
|
+
#
|
26
|
+
# @param [String] ref JIS document reference
|
27
|
+
# @param [String] year JIS document year
|
28
|
+
# @param [Hash] _opts options
|
29
|
+
#
|
30
|
+
# @return [RelatonJis::BibliographicItem, nil] JIS document
|
31
|
+
#
|
32
|
+
def get(ref, year = nil, _opts = {})
|
33
|
+
warn "[relaton-jis] (\"#{ref}\") fetching..."
|
34
|
+
result = search(ref, year).find
|
35
|
+
if result[:bib]
|
36
|
+
warn "[relaton-jis] (\"#{ref}\") found #{result[:bib].docidentifier[0].id}"
|
37
|
+
return result[:bib]
|
38
|
+
end
|
39
|
+
hint result, ref, year
|
40
|
+
end
|
41
|
+
|
42
|
+
#
|
43
|
+
# Log hint message
|
44
|
+
#
|
45
|
+
# @param [Hash] result search result
|
46
|
+
# @param [String] ref reference to search
|
47
|
+
# @param [String, nil] year year to search
|
48
|
+
#
|
49
|
+
def hint(result, ref, year)
|
50
|
+
warn "[relaton-jis] (\"#{ref}\") not found. The identifier must be " \
|
51
|
+
"exactly as shown on the webdesk.jsa.or.jp website."
|
52
|
+
if result[:missed_years]
|
53
|
+
warn "[relaton-jis] (\"#{ref}\") TIP: No match for edition year #{year}, " \
|
54
|
+
"but matches exist for #{result[:missed_years].uniq.join(', ')}."
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module RelatonJis
|
2
|
+
class Hit < RelatonBib::Hit
|
3
|
+
#
|
4
|
+
# Create new hit
|
5
|
+
#
|
6
|
+
# @param [Nokogiri::XML::Element] node found node
|
7
|
+
# @param [RelatonJis::HitCollection] collection hit collection
|
8
|
+
#
|
9
|
+
# @return [RelatonJis::Hit] new hit
|
10
|
+
#
|
11
|
+
def self.create(node, collection)
|
12
|
+
a = node.at("./a")
|
13
|
+
hit = { id: a.at("./text()").text.strip, url: a["href"] }
|
14
|
+
new hit, collection
|
15
|
+
end
|
16
|
+
|
17
|
+
#
|
18
|
+
# Check if hit matches reference
|
19
|
+
#
|
20
|
+
# @param [Hash] ref_parts parts of reference
|
21
|
+
# @param [String, nil] year year
|
22
|
+
#
|
23
|
+
# @return [Boolean] true if hit matches reference
|
24
|
+
#
|
25
|
+
def match?(ref_parts, year = nil) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/AbcSize, Metrics/PerceivedComplexity
|
26
|
+
id_parts[:code].include?(ref_parts[:code]) &&
|
27
|
+
(year.nil? || year == id_parts[:year]) &&
|
28
|
+
((ref_parts[:expl].nil? || !id_parts[:expl].nil?) &&
|
29
|
+
(ref_parts[:expl_num].nil? || ref_parts[:expl_num] == id_parts[:expl_num])) &&
|
30
|
+
((ref_parts[:amd].nil? || !id_parts[:amd].nil?) &&
|
31
|
+
(ref_parts[:amd_num].nil? || ref_parts[:amd_num] == id_parts[:amd_num]) &&
|
32
|
+
(ref_parts[:amd_year].nil? || ref_parts[:amd_year] == id_parts[:amd_year]))
|
33
|
+
end
|
34
|
+
|
35
|
+
#
|
36
|
+
# Return parts of document id
|
37
|
+
#
|
38
|
+
# @return [Hash] hash with parts of document id
|
39
|
+
#
|
40
|
+
def id_parts
|
41
|
+
@id_parts ||= hit_collection.parse_ref hit[:id]
|
42
|
+
end
|
43
|
+
|
44
|
+
def fetch
|
45
|
+
@fetch ||= Scraper.new(hit[:url]).fetch
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module RelatonJis
|
2
|
+
class HitCollection < RelatonBib::HitCollection
|
3
|
+
#
|
4
|
+
# Initialize hit collection
|
5
|
+
#
|
6
|
+
# @param [String] text reference
|
7
|
+
# @param [String, nil] year year
|
8
|
+
# @param [Nokogiri::XML::NodeSet] result <description>
|
9
|
+
#
|
10
|
+
def initialize(text, year = nil, result:)
|
11
|
+
super text, year
|
12
|
+
@array = result.map { |h| Hit.create h, self }
|
13
|
+
end
|
14
|
+
|
15
|
+
#
|
16
|
+
# Find hit in collection
|
17
|
+
#
|
18
|
+
# @return [Hash] hash with bib ot array of missed years
|
19
|
+
#
|
20
|
+
def find
|
21
|
+
missed_years = []
|
22
|
+
y = year || ref_parts[:year]
|
23
|
+
@array.each do |hit|
|
24
|
+
return { bib: hit.fetch } if hit.match? ref_parts, y
|
25
|
+
|
26
|
+
missed_years << hit.id_parts[:year] if y && hit.match?(ref_parts)
|
27
|
+
end
|
28
|
+
{ missed_years: missed_years }
|
29
|
+
end
|
30
|
+
|
31
|
+
#
|
32
|
+
# Return parts of reference
|
33
|
+
#
|
34
|
+
# @return [Hash] hash with parts of reference
|
35
|
+
#
|
36
|
+
def ref_parts
|
37
|
+
@ref_parts ||= parse_ref text
|
38
|
+
end
|
39
|
+
|
40
|
+
#
|
41
|
+
# Parse reference
|
42
|
+
#
|
43
|
+
# @param [String] ref reference
|
44
|
+
#
|
45
|
+
# @return [Hash] hash with parts of reference
|
46
|
+
#
|
47
|
+
def parse_ref(ref)
|
48
|
+
%r{
|
49
|
+
^(?<code>\w+\s\w\s?\d+)
|
50
|
+
(?::(?<year>\d{4}))?
|
51
|
+
(?:/(?<expl>EXPL(?:ANATION)?)(?:\s(?<expl_num>\d+))?)?
|
52
|
+
(?:/(?<amd>AMDENDMENT)(?:\s(?<amd_num>\d+)(?::(?<amd_year>\d{4}))?)?)?
|
53
|
+
}x =~ ref
|
54
|
+
{ code: code, year: year, expl: expl, expl_num: expl_num, amd: amd,
|
55
|
+
amd_num: amd_num, amd_year: amd_year }
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require "relaton/processor"
|
2
|
+
|
3
|
+
module RelatonJis
|
4
|
+
class Processor < Relaton::Processor
|
5
|
+
attr_reader :idtype
|
6
|
+
|
7
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
8
|
+
@short = :relaton_jis
|
9
|
+
@prefix = "JIS"
|
10
|
+
@defaultprefix = %r{^JIS\s}
|
11
|
+
@idtype = "JIS"
|
12
|
+
end
|
13
|
+
|
14
|
+
# @param code [String]
|
15
|
+
# @param date [String, NilClass] year
|
16
|
+
# @param opts [Hash]
|
17
|
+
# @return [RelatonJis::BibliographicItem]
|
18
|
+
def get(code, date, opts)
|
19
|
+
::RelatonJis::Bibliography.get(code, date, opts)
|
20
|
+
end
|
21
|
+
|
22
|
+
# @param xml [String]
|
23
|
+
# @return [RelatonJis::BibliographicItem]
|
24
|
+
def from_xml(xml)
|
25
|
+
::RelatonJis::XMLParser.from_xml xml
|
26
|
+
end
|
27
|
+
|
28
|
+
# @param hash [Hash]
|
29
|
+
# @return [RelatonJis::BibliographicItem]
|
30
|
+
def hash_to_bib(hash)
|
31
|
+
item_hash = ::RelatonJis::HashConverter.hash_to_bib(hash)
|
32
|
+
::RelatonJis::BibliographicItem.new(**item_hash)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Returns hash of XML grammar
|
36
|
+
# @return [String]
|
37
|
+
def grammar_hash
|
38
|
+
@grammar_hash ||= ::RelatonJis.grammar_hash
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
module RelatonJis
|
2
|
+
class Scraper
|
3
|
+
ATTRS = %i[
|
4
|
+
fetched title link abstract docid docnumber date type language script
|
5
|
+
docstatus doctype ics contributor editorialgroup structuredidentifier
|
6
|
+
].freeze
|
7
|
+
|
8
|
+
LANGS = { "和文" => { lang: "ja", script: "Jpan" },
|
9
|
+
"英訳" => { lang: "en", script: "Latn" } }.freeze
|
10
|
+
|
11
|
+
DATETYPES = { "発行年月日" => "issued", "確認年月日" => "confirmed" }.freeze
|
12
|
+
STATUSES = { "有効" => "valid", "廃止" => "withdrawn" }.freeze
|
13
|
+
|
14
|
+
def initialize(url)
|
15
|
+
@url = url
|
16
|
+
@agent = Mechanize.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def fetch
|
20
|
+
@doc = @agent.get(@url).at "//div[@id='main']/section"
|
21
|
+
attrs = ATTRS.each_with_object({}) do |attr, hash|
|
22
|
+
hash[attr] = send "fetch_#{attr}"
|
23
|
+
end
|
24
|
+
BibliographicItem.new(**attrs)
|
25
|
+
end
|
26
|
+
|
27
|
+
def fetch_fetched
|
28
|
+
Date.today.to_s
|
29
|
+
end
|
30
|
+
|
31
|
+
def fetch_title
|
32
|
+
{ "ja" => "Jpan", "en" => "Lant" }.map.with_index do |(lang, script), i|
|
33
|
+
content = @doc.at("./h2/text()[#{i + 2}]").text.strip
|
34
|
+
RelatonBib::TypedTitleString.new content: content, language: lang, script: script
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def fetch_link
|
39
|
+
src = RelatonBib::TypedUri.new content: @url, type: "src"
|
40
|
+
uri = URI @url
|
41
|
+
domain = "#{uri.scheme}://#{uri.host}"
|
42
|
+
@doc.xpath("./table/tr[th[.='プレビュー']]/td/a").reduce([src]) do |mem, node|
|
43
|
+
href = "#{domain}#{node[:href]}"
|
44
|
+
mem << RelatonBib::TypedUri.new(content: href, type: "pdf")
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def fetch_abstract
|
49
|
+
@doc.xpath("./table/tr[th[.='規格概要']]/td").map do |node|
|
50
|
+
RelatonBib::FormattedString.new content: node.text.strip, language: "ja", script: "Jpan"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def fetch_docid
|
55
|
+
id = @doc.at("./h2/text()[1]").text.strip
|
56
|
+
[RelatonBib::DocumentIdentifier.new(id: id, type: "JIS", primary: true)]
|
57
|
+
end
|
58
|
+
|
59
|
+
def fetch_docnumber
|
60
|
+
match = @doc.at("./h2/text()[1]").text.strip.match(/^\w+\s(\w)\s?(\d+)/)
|
61
|
+
"#{match[1]}#{match[2]}"
|
62
|
+
end
|
63
|
+
|
64
|
+
def fetch_date
|
65
|
+
DATETYPES.each_with_object([]) do |(key, type), a|
|
66
|
+
node = @doc.at("./div/div/div/p/text()[contains(.,'#{key}')]")
|
67
|
+
next unless node
|
68
|
+
|
69
|
+
on = node.text.match(/\d{4}-\d{2}-\d{2}/).to_s
|
70
|
+
a << RelatonBib::BibliographicDate.new(type: type, on: on)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def fetch_type
|
75
|
+
"standard"
|
76
|
+
end
|
77
|
+
|
78
|
+
def fetch_language
|
79
|
+
langs_scripts.map { |l| l[:lang] }
|
80
|
+
end
|
81
|
+
|
82
|
+
def fetch_script
|
83
|
+
langs_scripts.map { |l| l[:script] }
|
84
|
+
end
|
85
|
+
|
86
|
+
def langs_scripts
|
87
|
+
@langs_scripts ||= LANGS.each_with_object([]) do |(key, lang), a|
|
88
|
+
l = @doc.at("./div/div/div[@class='blockContentFile']/div/div/p[1]/span[contains(.,'#{key}')]/following-sibling::span")
|
89
|
+
next if l.nil? || l.text.strip == "-"
|
90
|
+
|
91
|
+
a << lang
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def fetch_docstatus
|
96
|
+
st = @doc.at("./div/div/div/p/text()[contains(.,'状態')]/following-sibling::span")
|
97
|
+
return unless st
|
98
|
+
|
99
|
+
RelatonBib::DocumentStatus.new(stage: STATUSES[st.text.strip])
|
100
|
+
end
|
101
|
+
|
102
|
+
def fetch_doctype
|
103
|
+
"standard"
|
104
|
+
end
|
105
|
+
|
106
|
+
def fetch_ics
|
107
|
+
@doc.xpath("./table/tr[th[.='ICS']]/td").map do |node|
|
108
|
+
RelatonIsoBib::Ics.new node.text.strip
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def fetch_contributor
|
113
|
+
@doc.xpath("./table/tr[th[.='原案作成団体']]/td").map do |node|
|
114
|
+
name = RelatonBib::LocalizedString.new node.text.strip, "ja", "Jpan"
|
115
|
+
org = RelatonBib::Organization.new name: name
|
116
|
+
RelatonBib::ContributionInfo.new entity: org, role: [type: "author"]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def fetch_editorialgroup
|
121
|
+
node = @doc.at("./table/tr[th[.='原案作成団体']]/td")
|
122
|
+
return unless node
|
123
|
+
|
124
|
+
tc = RelatonBib::WorkGroup.new name: node.text.strip
|
125
|
+
RelatonIsoBib::EditorialGroup.new technical_committee: [tc]
|
126
|
+
end
|
127
|
+
|
128
|
+
def fetch_structuredidentifier
|
129
|
+
RelatonIsoBib::StructuredIdentifier.new project_number: fetch_docnumber, type: "JIS"
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
data/lib/relaton_jis.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "mechanize"
|
4
|
+
require "relaton_iso_bib"
|
5
|
+
require_relative "relaton_jis/version"
|
6
|
+
require_relative "relaton_jis/bibliographic_item"
|
7
|
+
require_relative "relaton_jis/xml_parser"
|
8
|
+
require_relative "relaton_jis/hash_converter"
|
9
|
+
require_relative "relaton_jis/scraper"
|
10
|
+
require_relative "relaton_jis/bibliography"
|
11
|
+
require_relative "relaton_jis/hit_collection"
|
12
|
+
require_relative "relaton_jis/hit"
|
13
|
+
|
14
|
+
module RelatonJis
|
15
|
+
class Error < StandardError; end
|
16
|
+
|
17
|
+
# Returns hash of XML reammar
|
18
|
+
# @return [String]
|
19
|
+
def self.grammar_hash
|
20
|
+
gem_path = File.expand_path "..", __dir__
|
21
|
+
grammars_path = File.join gem_path, "grammars", "*"
|
22
|
+
grammars = Dir[grammars_path].sort.map { |gp| File.read gp }.join
|
23
|
+
Digest::MD5.hexdigest grammars
|
24
|
+
end
|
25
|
+
end
|
data/relaton_jis.gemspec
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/relaton_jis/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "relaton-jis"
|
7
|
+
spec.version = RelatonJis::VERSION
|
8
|
+
spec.authors = ["Ribose Inc."]
|
9
|
+
spec.email = ["open.source@ribose.com"]
|
10
|
+
|
11
|
+
spec.summary = "RelatonJis: retrieve IETF Standards for bibliographic " \
|
12
|
+
"use using the BibliographicItem model"
|
13
|
+
spec.description = "RelatonJis: retrieve IETF Standards for bibliographic " \
|
14
|
+
"use using the BibliographicItem model"
|
15
|
+
spec.homepage = "https://github.com/relaton/relaton-jis"
|
16
|
+
spec.license = "MIT"
|
17
|
+
spec.required_ruby_version = ">= 2.7.0"
|
18
|
+
|
19
|
+
# spec.metadata["allowed_push_host"] = "TODO: Set to your gem server 'https://example.com'"
|
20
|
+
|
21
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
22
|
+
# spec.metadata["source_code_uri"] = "TODO: Put your gem's public repo URL here."
|
23
|
+
# spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
|
24
|
+
|
25
|
+
# Specify which files should be added to the gem when it is released.
|
26
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
27
|
+
spec.files = Dir.chdir(__dir__) do
|
28
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
29
|
+
(f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|circleci)|appveyor)})
|
30
|
+
end
|
31
|
+
end
|
32
|
+
spec.bindir = "exe"
|
33
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
34
|
+
spec.require_paths = ["lib"]
|
35
|
+
|
36
|
+
# Uncomment to register a new dependency of your gem
|
37
|
+
spec.add_dependency "mechanize", "~> 2.8.0"
|
38
|
+
spec.add_dependency "relaton-iso-bib", "~> 1.14.0"
|
39
|
+
|
40
|
+
# For more information and examples about making a new gem, check out our
|
41
|
+
# guide at: https://bundler.io/guides/creating_gem.html
|
42
|
+
end
|