relaton-ieee 1.8.0 → 1.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,274 @@
1
+ module RelatonIeee
2
+ class DataParser
3
+ DATETYPES = { "OriginalPub" => "created", "ePub" => "published",
4
+ "LastInspecUpd" => "updated" }.freeze
5
+
6
+ attr_reader :doc, :fetcher
7
+
8
+ #
9
+ # Create RelatonIeee::DataParser instance
10
+ #
11
+ # @param [Nokogiri::XML::Element] doc document
12
+ # @param [RelatonIeee::DataFetcher] fetcher
13
+ #
14
+ def initialize(doc, fetcher)
15
+ @doc = doc
16
+ @fetcher = fetcher
17
+ end
18
+
19
+ #
20
+ # Parse IEEE document
21
+ #
22
+ # @param [Nokogiri::XML::Element] doc document
23
+ # @param [RelatonIeee::DataFetcher] fetcher <description>
24
+ #
25
+ # @return [RelatonIeee::IeeeBibliographicItem]
26
+ #
27
+ def self.parse(doc, fetcher)
28
+ new(doc, fetcher).parse
29
+ end
30
+
31
+ #
32
+ # Parse IEEE document
33
+ #
34
+ # @return [RelatonIeee::IeeeBibliographicItem]
35
+ #
36
+ def parse # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
37
+ args = {
38
+ type: "standard",
39
+ docnumber: docnumber,
40
+ title: parse_title,
41
+ date: parse_date,
42
+ docid: parse_docid,
43
+ contributor: parse_contributor,
44
+ abstract: parse_abstract,
45
+ copyright: parse_copyright,
46
+ language: ["en"],
47
+ script: ["Latn"],
48
+ status: parse_status,
49
+ relation: parse_relation,
50
+ link: parse_link,
51
+ keyword: parse_keyword,
52
+ ics: parse_ics,
53
+ }
54
+ IeeeBibliographicItem.new(**args)
55
+ end
56
+
57
+ #
58
+ # Parse title
59
+ #
60
+ # @return [RelatonBib::TypedTitleStringCollection]
61
+ #
62
+ def parse_title
63
+ t = doc.at("./volume/article/title").text
64
+ RelatonBib::TypedTitleString.from_string t
65
+ end
66
+
67
+ #
68
+ # Parse date
69
+ #
70
+ # @return [Array<RelatonBib::BibliographicDate>]
71
+ #
72
+ def parse_date # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength
73
+ dates = doc.xpath("./volume/article/articleinfo/date").map do |d|
74
+ da = [d.at("./year").text]
75
+ m = d.at("./month")&.text
76
+ if m
77
+ month = Date::ABBR_MONTHNAMES.index(m.sub(/\./, "")) || m
78
+ da << month.to_s.rjust(2, "0")
79
+ end
80
+ day = d.at("./day")
81
+ da << day.text.rjust(2, "0") if day
82
+ on = da.compact.join "-"
83
+ RelatonBib::BibliographicDate.new type: DATETYPES[d[:datetype]], on: on
84
+ end
85
+ pad = doc.at("./publicationinfo/PubApprovalDate")
86
+ if pad
87
+ issued = parse_date_string pad.text
88
+ dates << RelatonBib::BibliographicDate.new(type: "issued", on: issued)
89
+ end
90
+ dates
91
+ end
92
+
93
+ #
94
+ # Convert date string with month name to numeric date
95
+ #
96
+ # @param [String] date source date
97
+ #
98
+ # @return [String] numeric date
99
+ #
100
+ def parse_date_string(date)
101
+ case date
102
+ when /^\d{4}$/ then date
103
+ when /^\d{1,2}\s\w+\.?\s\d{4}/ then Date.parse(date).to_s
104
+ end
105
+ end
106
+
107
+ #
108
+ # Parse identifiers
109
+ #
110
+ # @return [Array<RelatonBib::DocumentIdentifier>]
111
+ #
112
+ def parse_docid
113
+ ids = [{ id: pubid.to_s, type: "IEEE" }]
114
+ isbn = doc.at("./publicationinfo/isbn")
115
+ ids << { id: isbn.text, type: "ISBN" } if isbn
116
+ doi = doc.at("./volume/article/articleinfo/articledoi")
117
+ ids << { id: doi.text, type: "DOI" } if doi
118
+ ids.map do |dcid|
119
+ RelatonBib::DocumentIdentifier.new(**dcid)
120
+ end
121
+ end
122
+
123
+ def pubid
124
+ @pubid ||= begin
125
+ nt = doc.at("./normtitle").text
126
+ RawbibIdParser.parse(nt)
127
+ end
128
+ end
129
+
130
+ #
131
+ # Parse docnumber
132
+ #
133
+ # @return [String] PubID
134
+ #
135
+ def docnumber
136
+ @docnumber ||= pubid&.to_id # doc.at("./publicationinfo/stdnumber").text
137
+ end
138
+
139
+ #
140
+ # Parse contributors
141
+ #
142
+ # @return [Array<RelatonBib::ContributionInfo>]
143
+ #
144
+ def parse_contributor # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
145
+ doc.xpath("./publicationinfo/publisher").map do |contrib|
146
+ n = contrib.at("./publishername").text
147
+ addr = contrib.xpath("./address").map do |a|
148
+ RelatonBib::Address.new(
149
+ street: [],
150
+ city: a.at("./city")&.text,
151
+ country: a.at("./country").text,
152
+ )
153
+ end
154
+ e = create_org n, addr
155
+ RelatonBib::ContributionInfo.new entity: e, role: [type: "publisher"]
156
+ end
157
+ end
158
+
159
+ #
160
+ # Create organization
161
+ #
162
+ # @param [String] name organization's name
163
+ # @param [Array<Hash>] addr address
164
+ #
165
+ # @return [RelatonBib::Organization]
166
+ def create_org(name, addr = []) # rubocop:disable Metrics/MethodLength
167
+ case name
168
+ when "IEEE"
169
+ abbr = name
170
+ n = "Institute of Electrical and Electronics Engineers"
171
+ url = "http://www.ieee.org"
172
+ when "ANSI"
173
+ abbr = name
174
+ n = "American National Standards Institute"
175
+ url = "https://www.ansi.org"
176
+ else n = name
177
+ end
178
+ RelatonBib::Organization.new(
179
+ name: n, abbreviation: abbr, url: url, contact: addr,
180
+ )
181
+ end
182
+
183
+ #
184
+ # Parse abstract
185
+ #
186
+ # @return [Array<RelatonBib::FormattedString>]
187
+ #
188
+ def parse_abstract
189
+ doc.xpath("./volume/article/articleinfo/abstract").map do |a|
190
+ RelatonBib::FormattedString.new(
191
+ content: a.text, language: "en", script: "Latn",
192
+ )
193
+ end
194
+ end
195
+
196
+ #
197
+ # Parse copyright
198
+ #
199
+ # @return [Array<RelatonBib::CopyrightAssociation>]
200
+ #
201
+ def parse_copyright
202
+ doc.xpath("./publicationinfo/copyrightgroup/copyright").map do |c|
203
+ owner = c.at("./holder").text.split("/").map do |own|
204
+ RelatonBib::ContributionInfo.new entity: create_org(own)
205
+ end
206
+ RelatonBib::CopyrightAssociation.new(
207
+ owner: owner, from: c.at("./year").text,
208
+ )
209
+ end
210
+ end
211
+
212
+ #
213
+ # Parse status
214
+ #
215
+ # @return [RelatonBib::DocumentStatus]
216
+ #
217
+ def parse_status
218
+ stage = doc.at("./publicationinfo/standard_status").text
219
+ RelatonBib::DocumentStatus.new stage: stage
220
+ end
221
+
222
+ #
223
+ # Parse relation
224
+ #
225
+ # @return [RelatonBib::DocRelationCollection]
226
+ #
227
+ def parse_relation # rubocop:disable Metrics/AbcSize
228
+ rels = []
229
+ doc.xpath("./publicationinfo/standard_relationship").each do |r|
230
+ if (ref = fetcher.backrefs[r.text])
231
+ rel = fetcher.create_relation(r[:type], ref)
232
+ rels << rel if rel
233
+ elsif !/Inactive Date/.match?(r) && docnumber
234
+ fetcher.add_crossref(docnumber, r)
235
+ end
236
+ end
237
+ RelatonBib::DocRelationCollection.new rels
238
+ end
239
+
240
+ #
241
+ # Parce link
242
+ #
243
+ # @return [Array<RelatonBib::TypedUri>]
244
+ #
245
+ def parse_link
246
+ doc.xpath("./volume/article/articleinfo/amsid").map do |id|
247
+ l = "https://ieeexplore.ieee.org/document/#{id.text}"
248
+ RelatonBib::TypedUri.new content: l, type: "src"
249
+ end
250
+ end
251
+
252
+ #
253
+ # Parse keyword
254
+ #
255
+ # @return [Array<Strign>]
256
+ #
257
+ def parse_keyword
258
+ doc.xpath(
259
+ "./volume/article/articleinfo/keywordset/keyword/keywordterm",
260
+ ).map &:text
261
+ end
262
+
263
+ #
264
+ # Parse ICS
265
+ #
266
+ # @return [Array<RelatonBib::ICS>]
267
+ #
268
+ def parse_ics
269
+ doc.xpath("./publicationinfo/icscodes/code_term").map do |ics|
270
+ RelatonBib::ICS.new code: ics[:codenum], text: ics.text
271
+ end
272
+ end
273
+ end
274
+ end
@@ -15,7 +15,7 @@ module RelatonIeee
15
15
  # @param opts [Hash]
16
16
  def initialize(ref) # rubocop:disable Metrics/MethodLength
17
17
  super
18
- code = ref.sub /^IEEE\s(Std\s)?/, ""
18
+ code = ref.sub(/^IEEE\s(Std\s)?/, "")
19
19
  search = CGI.escape({ data: { searchTerm: code } }.to_json)
20
20
  url = "#{DOMAIN}/bin/standards/search?data=#{search}"
21
21
  resp = Faraday.get url
@@ -23,7 +23,7 @@ module RelatonIeee
23
23
  json = JSON.parse resp_json["message"]
24
24
  @array = json["response"]["searchResults"]["resultsMapList"]
25
25
  .reduce([]) do |s, hit|
26
- /^(?:\w+\s)?(?<id>[A-Z\d\.]+)(-(?<year>\d{4}))?/ =~ hit["record"]["recordTitle"]
26
+ /^(?:\w+\s)?(?<id>[A-Z\d.]+)(?:-(?<year>\d{4}))?/ =~ hit["record"]["recordTitle"]
27
27
  next s unless id && code =~ %r{^#{id}}
28
28
 
29
29
  s << Hit.new(hit["record"].merge(code: id, year: year.to_i), self)
@@ -5,7 +5,7 @@ module RelatonIeee
5
5
 
6
6
  # @param committee [Array<RelatonIeee::Committee>]
7
7
  def initialize(**args)
8
- @committee = args.delete :committee
8
+ @committee = args.delete(:committee) || []
9
9
  super
10
10
  end
11
11
 
@@ -13,7 +13,7 @@ module RelatonIeee
13
13
  # @return [RelatonIeee::IeeeBibliographicItem]
14
14
  def self.from_hash(hash)
15
15
  item_hash = ::RelatonIeee::HashConverter.hash_to_bib(hash)
16
- new **item_hash
16
+ new(**item_hash)
17
17
  end
18
18
 
19
19
  # @param opts [Hash]
@@ -22,7 +22,7 @@ module RelatonIeee
22
22
  # @option opts [String] :lang language
23
23
  # @return [String] XML
24
24
  def to_xml(**opts)
25
- super **opts do |bldr|
25
+ super(**opts) do |bldr|
26
26
  if opts[:bibdata] && committee.any?
27
27
  bldr.ext do |b|
28
28
  committee.each { |c| c.to_xml b }
@@ -34,7 +34,7 @@ module RelatonIeee
34
34
  # @return [Hash]
35
35
  def to_hash
36
36
  hash = super
37
- hash["committee"] = committee.map &:to_hash
37
+ hash["committee"] = committee.map &:to_hash if committee.any?
38
38
  hash
39
39
  end
40
40
 
@@ -19,7 +19,7 @@ module RelatonIeee
19
19
  warn "[relaton-ieee] (\"#{code}\") fetching..."
20
20
  result = search(code) || (return nil)
21
21
  year ||= code.match(/(?<=-)\d{4}/)&.to_s
22
- ret = bib_results_filter(result, year)
22
+ ret = bib_results_filter(result, code, year)
23
23
  if ret[:ret]
24
24
  item = ret[:ret].fetch
25
25
  warn "[relaton-ieee] (\"#{code}\") found #{item.docidentifier.first.id}"
@@ -42,9 +42,13 @@ module RelatonIeee
42
42
  # @param opts [Hash] options
43
43
  #
44
44
  # @return [Hash]
45
- def bib_results_filter(result, year)
45
+ def bib_results_filter(result, ref, year)
46
+ rp1 = ref_parts ref
46
47
  missed_years = []
47
48
  result.each do |hit|
49
+ rp2 = ref_parts hit.hit["recordTitle"]
50
+ next if rp1[:code] != rp2[:code] || rp1[:corr] != rp2[:corr]
51
+
48
52
  return { ret: hit } if !year
49
53
 
50
54
  return { ret: hit } if year.to_i == hit.hit[:year]
@@ -54,6 +58,15 @@ module RelatonIeee
54
58
  { years: missed_years.uniq }
55
59
  end
56
60
 
61
+ def ref_parts(ref)
62
+ %r{
63
+ ^(?:IEEE\s(?:Std\s)?)?
64
+ (?<code>[^-/]+)
65
+ (?:-(?<year>\d{4}))?
66
+ (?:/(?<corr>\w+\s\d+-\d{4}))?
67
+ }x.match ref
68
+ end
69
+
57
70
  # @param code [Strig]
58
71
  # @param year [String]
59
72
  # @param missed_years [Array<Strig>]
@@ -4,11 +4,12 @@ module RelatonIeee
4
4
  class Processor < Relaton::Processor
5
5
  attr_reader :idtype
6
6
 
7
- def initialize
7
+ def initialize # rubocop:disable Lint/MissingSuper
8
8
  @short = :relaton_ieee
9
9
  @prefix = "IEEE"
10
10
  @defaultprefix = %r{^IEEE\s}
11
11
  @idtype = "IEEE"
12
+ @datasets = %w[ieee-rawbib]
12
13
  end
13
14
 
14
15
  # @param code [String]
@@ -19,6 +20,18 @@ module RelatonIeee
19
20
  ::RelatonIeee::IeeeBibliography.get(code, date, opts)
20
21
  end
21
22
 
23
+ #
24
+ # Fetch all the documents from ./iee-rawbib directory
25
+ #
26
+ # @param [String] _source source name
27
+ # @param [Hash] opts
28
+ # @option opts [String] :output directory to output documents
29
+ # @option opts [String] :format
30
+ #
31
+ def fetch_data(_source, opts)
32
+ DataFetcher.fetch(**opts)
33
+ end
34
+
22
35
  # @param xml [String]
23
36
  # @return [RelatonIeee::IeeeBibliographicItem]
24
37
  def from_xml(xml)
@@ -0,0 +1,149 @@
1
+ module RelatonIeee
2
+ class PubId
3
+ class Id
4
+ # @return [String]
5
+ attr_reader :number
6
+
7
+ # @return [String, nil]
8
+ attr_reader :publisher, :stage, :part, :status, :approval, :edition,
9
+ :draft, :rev, :corr, :amd, :redline, :year, :month
10
+
11
+ #
12
+ # PubId constructor
13
+ #
14
+ # @param [String] number
15
+ # @param [<Hash>] **args
16
+ # @option args [String] :number
17
+ # @option args [String] :publisher
18
+ # @option args [String] :stage
19
+ # @option args [String] :part
20
+ # @option args [String] :status
21
+ # @option args [String] :approval
22
+ # @option args [String] :edition
23
+ # @option args [String] :draft
24
+ # @option args [String] :rev
25
+ # @option args [String] :corr
26
+ # @option args [String] :amd
27
+ # @option args [Boolean] :redline
28
+ # @option args [String] :year
29
+ # @option args [String] :month
30
+ #
31
+ def initialize(number:, **args) # rubocop:disable Metrics/MethodLength
32
+ @publisher = args[:publisher]
33
+ @stage = args[:stage]
34
+ @number = number
35
+ @part = args[:part]
36
+ @status = args[:status]
37
+ @approval = args[:approval]
38
+ @edition = args[:edition]
39
+ @draft = args[:draft]
40
+ @rev = args[:rev]
41
+ @corr = args[:corr]
42
+ @amd = args[:amd]
43
+ @year = args[:year]
44
+ @month = args[:month]
45
+ @redline = args[:redline]
46
+ end
47
+
48
+ #
49
+ # PubId string representation
50
+ #
51
+ # @return [String]
52
+ #
53
+ def to_s # rubocop:disable Metrics/AbcSize
54
+ out = number
55
+ out = "#{stage} #{out}" if stage
56
+ out = "#{approval} #{out}" if approval
57
+ out = "#{status} #{out}" if status
58
+ out = "#{publisher} #{out}" if publisher
59
+ out += "-#{part}" if part
60
+ out += edition_to_s + draft_to_s + rev_to_s + corr_to_s + amd_to_s
61
+ out + year_to_s + month_to_s + redline_to_s
62
+ end
63
+
64
+ def edition_to_s
65
+ edition ? "/E-#{edition}" : ""
66
+ end
67
+
68
+ def draft_to_s
69
+ draft ? "/D-#{draft}" : ""
70
+ end
71
+
72
+ def rev_to_s
73
+ rev ? "/R-#{rev}" : ""
74
+ end
75
+
76
+ def corr_to_s
77
+ corr ? "/Cor#{corr}" : ""
78
+ end
79
+
80
+ def amd_to_s
81
+ amd ? "/Amd#{amd}" : ""
82
+ end
83
+
84
+ def year_to_s
85
+ year ? ".#{year}" : ""
86
+ end
87
+
88
+ def month_to_s
89
+ month ? "-#{month}" : ""
90
+ end
91
+
92
+ def redline_to_s
93
+ redline ? " Redline" : ""
94
+ end
95
+ end
96
+
97
+ # @return [Array<RelatonIeee::PubId::Id>]
98
+ attr_reader :pubid
99
+
100
+ #
101
+ # IEEE publication id
102
+ #
103
+ # @param [Array<Hash>, Hash] pubid
104
+ #
105
+ def initialize(pubid)
106
+ @pubid = array(pubid).map { |id| Id.new(**id) }
107
+ end
108
+
109
+ #
110
+ # Convert to array
111
+ #
112
+ # @param [Array<Hash>, Hash] pid
113
+ #
114
+ # @return [Array<Hash>]
115
+ #
116
+ def array(pid)
117
+ pid.is_a?(Array) ? pid : [pid]
118
+ end
119
+
120
+ #
121
+ # PubId string representation
122
+ #
123
+ # @return [String]
124
+ #
125
+ def to_s
126
+ pubid.map(&:to_s).join("/")
127
+ end
128
+
129
+ #
130
+ # Generate ID without publisher and second number
131
+ #
132
+ # @return [String]
133
+ #
134
+ def to_id # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
135
+ out = pubid[0].to_s
136
+ if pubid.size > 1
137
+ out += pubid[1].edition_to_s if pubid[0].edition.nil?
138
+ out += pubid[1].draft_to_s if pubid[0].draft.nil?
139
+ out += pubid[1].rev_to_s if pubid[0].rev.nil?
140
+ out += pubid[1].corr_to_s if pubid[0].corr.nil?
141
+ out += pubid[1].amd_to_s if pubid[0].amd.nil?
142
+ out += pubid[1].year_to_s if pubid[0].year.nil?
143
+ out += pubid[1].month_to_s if pubid[0].month.nil?
144
+ out += pubid[1].redline_to_s unless pubid[0].redline
145
+ end
146
+ out
147
+ end
148
+ end
149
+ end