relaton-ieee 1.8.0 → 1.9.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,274 @@
1
+ module RelatonIeee
2
+ class DataParser
3
+ DATETYPES = { "OriginalPub" => "created", "ePub" => "published",
4
+ "LastInspecUpd" => "updated" }.freeze
5
+
6
+ attr_reader :doc, :fetcher
7
+
8
+ #
9
+ # Create RelatonIeee::DataParser instance
10
+ #
11
+ # @param [Nokogiri::XML::Element] doc document
12
+ # @param [RelatonIeee::DataFetcher] fetcher
13
+ #
14
+ def initialize(doc, fetcher)
15
+ @doc = doc
16
+ @fetcher = fetcher
17
+ end
18
+
19
+ #
20
+ # Parse IEEE document
21
+ #
22
+ # @param [Nokogiri::XML::Element] doc document
23
+ # @param [RelatonIeee::DataFetcher] fetcher <description>
24
+ #
25
+ # @return [RelatonIeee::IeeeBibliographicItem]
26
+ #
27
+ def self.parse(doc, fetcher)
28
+ new(doc, fetcher).parse
29
+ end
30
+
31
+ #
32
+ # Parse IEEE document
33
+ #
34
+ # @return [RelatonIeee::IeeeBibliographicItem]
35
+ #
36
+ def parse # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
37
+ args = {
38
+ type: "standard",
39
+ docnumber: docnumber,
40
+ title: parse_title,
41
+ date: parse_date,
42
+ docid: parse_docid,
43
+ contributor: parse_contributor,
44
+ abstract: parse_abstract,
45
+ copyright: parse_copyright,
46
+ language: ["en"],
47
+ script: ["Latn"],
48
+ status: parse_status,
49
+ relation: parse_relation,
50
+ link: parse_link,
51
+ keyword: parse_keyword,
52
+ ics: parse_ics,
53
+ }
54
+ IeeeBibliographicItem.new(**args)
55
+ end
56
+
57
+ #
58
+ # Parse title
59
+ #
60
+ # @return [RelatonBib::TypedTitleStringCollection]
61
+ #
62
+ def parse_title
63
+ t = doc.at("./volume/article/title").text
64
+ RelatonBib::TypedTitleString.from_string t
65
+ end
66
+
67
+ #
68
+ # Parse date
69
+ #
70
+ # @return [Array<RelatonBib::BibliographicDate>]
71
+ #
72
+ def parse_date # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength
73
+ dates = doc.xpath("./volume/article/articleinfo/date").map do |d|
74
+ da = [d.at("./year").text]
75
+ m = d.at("./month")&.text
76
+ if m
77
+ month = Date::ABBR_MONTHNAMES.index(m.sub(/\./, "")) || m
78
+ da << month.to_s.rjust(2, "0")
79
+ end
80
+ day = d.at("./day")
81
+ da << day.text.rjust(2, "0") if day
82
+ on = da.compact.join "-"
83
+ RelatonBib::BibliographicDate.new type: DATETYPES[d[:datetype]], on: on
84
+ end
85
+ pad = doc.at("./publicationinfo/PubApprovalDate")
86
+ if pad
87
+ issued = parse_date_string pad.text
88
+ dates << RelatonBib::BibliographicDate.new(type: "issued", on: issued)
89
+ end
90
+ dates
91
+ end
92
+
93
+ #
94
+ # Convert date string with month name to numeric date
95
+ #
96
+ # @param [String] date source date
97
+ #
98
+ # @return [String] numeric date
99
+ #
100
+ def parse_date_string(date)
101
+ case date
102
+ when /^\d{4}$/ then date
103
+ when /^\d{1,2}\s\w+\.?\s\d{4}/ then Date.parse(date).to_s
104
+ end
105
+ end
106
+
107
+ #
108
+ # Parse identifiers
109
+ #
110
+ # @return [Array<RelatonBib::DocumentIdentifier>]
111
+ #
112
+ def parse_docid
113
+ ids = [{ id: pubid.to_s, type: "IEEE" }]
114
+ isbn = doc.at("./publicationinfo/isbn")
115
+ ids << { id: isbn.text, type: "ISBN" } if isbn
116
+ doi = doc.at("./volume/article/articleinfo/articledoi")
117
+ ids << { id: doi.text, type: "DOI" } if doi
118
+ ids.map do |dcid|
119
+ RelatonBib::DocumentIdentifier.new(**dcid)
120
+ end
121
+ end
122
+
123
+ def pubid
124
+ @pubid ||= begin
125
+ nt = doc.at("./normtitle").text
126
+ RawbibIdParser.parse(nt)
127
+ end
128
+ end
129
+
130
+ #
131
+ # Parse docnumber
132
+ #
133
+ # @return [String] PubID
134
+ #
135
+ def docnumber
136
+ @docnumber ||= pubid&.to_id # doc.at("./publicationinfo/stdnumber").text
137
+ end
138
+
139
+ #
140
+ # Parse contributors
141
+ #
142
+ # @return [Array<RelatonBib::ContributionInfo>]
143
+ #
144
+ def parse_contributor # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
145
+ doc.xpath("./publicationinfo/publisher").map do |contrib|
146
+ n = contrib.at("./publishername").text
147
+ addr = contrib.xpath("./address").map do |a|
148
+ RelatonBib::Address.new(
149
+ street: [],
150
+ city: a.at("./city")&.text,
151
+ country: a.at("./country").text,
152
+ )
153
+ end
154
+ e = create_org n, addr
155
+ RelatonBib::ContributionInfo.new entity: e, role: [type: "publisher"]
156
+ end
157
+ end
158
+
159
+ #
160
+ # Create organization
161
+ #
162
+ # @param [String] name organization's name
163
+ # @param [Array<Hash>] addr address
164
+ #
165
+ # @return [RelatonBib::Organization]
166
+ def create_org(name, addr = []) # rubocop:disable Metrics/MethodLength
167
+ case name
168
+ when "IEEE"
169
+ abbr = name
170
+ n = "Institute of Electrical and Electronics Engineers"
171
+ url = "http://www.ieee.org"
172
+ when "ANSI"
173
+ abbr = name
174
+ n = "American National Standards Institute"
175
+ url = "https://www.ansi.org"
176
+ else n = name
177
+ end
178
+ RelatonBib::Organization.new(
179
+ name: n, abbreviation: abbr, url: url, contact: addr,
180
+ )
181
+ end
182
+
183
+ #
184
+ # Parse abstract
185
+ #
186
+ # @return [Array<RelatonBib::FormattedString>]
187
+ #
188
+ def parse_abstract
189
+ doc.xpath("./volume/article/articleinfo/abstract").map do |a|
190
+ RelatonBib::FormattedString.new(
191
+ content: a.text, language: "en", script: "Latn",
192
+ )
193
+ end
194
+ end
195
+
196
+ #
197
+ # Parse copyright
198
+ #
199
+ # @return [Array<RelatonBib::CopyrightAssociation>]
200
+ #
201
+ def parse_copyright
202
+ doc.xpath("./publicationinfo/copyrightgroup/copyright").map do |c|
203
+ owner = c.at("./holder").text.split("/").map do |own|
204
+ RelatonBib::ContributionInfo.new entity: create_org(own)
205
+ end
206
+ RelatonBib::CopyrightAssociation.new(
207
+ owner: owner, from: c.at("./year").text,
208
+ )
209
+ end
210
+ end
211
+
212
+ #
213
+ # Parse status
214
+ #
215
+ # @return [RelatonBib::DocumentStatus]
216
+ #
217
+ def parse_status
218
+ stage = doc.at("./publicationinfo/standard_status").text
219
+ RelatonBib::DocumentStatus.new stage: stage
220
+ end
221
+
222
+ #
223
+ # Parse relation
224
+ #
225
+ # @return [RelatonBib::DocRelationCollection]
226
+ #
227
+ def parse_relation # rubocop:disable Metrics/AbcSize
228
+ rels = []
229
+ doc.xpath("./publicationinfo/standard_relationship").each do |r|
230
+ if (ref = fetcher.backrefs[r.text])
231
+ rel = fetcher.create_relation(r[:type], ref)
232
+ rels << rel if rel
233
+ elsif !/Inactive Date/.match?(r) && docnumber
234
+ fetcher.add_crossref(docnumber, r)
235
+ end
236
+ end
237
+ RelatonBib::DocRelationCollection.new rels
238
+ end
239
+
240
+ #
241
+ # Parce link
242
+ #
243
+ # @return [Array<RelatonBib::TypedUri>]
244
+ #
245
+ def parse_link
246
+ doc.xpath("./volume/article/articleinfo/amsid").map do |id|
247
+ l = "https://ieeexplore.ieee.org/document/#{id.text}"
248
+ RelatonBib::TypedUri.new content: l, type: "src"
249
+ end
250
+ end
251
+
252
+ #
253
+ # Parse keyword
254
+ #
255
+ # @return [Array<Strign>]
256
+ #
257
+ def parse_keyword
258
+ doc.xpath(
259
+ "./volume/article/articleinfo/keywordset/keyword/keywordterm",
260
+ ).map &:text
261
+ end
262
+
263
+ #
264
+ # Parse ICS
265
+ #
266
+ # @return [Array<RelatonBib::ICS>]
267
+ #
268
+ def parse_ics
269
+ doc.xpath("./publicationinfo/icscodes/code_term").map do |ics|
270
+ RelatonBib::ICS.new code: ics[:codenum], text: ics.text
271
+ end
272
+ end
273
+ end
274
+ end
@@ -15,7 +15,7 @@ module RelatonIeee
15
15
  # @param opts [Hash]
16
16
  def initialize(ref) # rubocop:disable Metrics/MethodLength
17
17
  super
18
- code = ref.sub /^IEEE\s(Std\s)?/, ""
18
+ code = ref.sub(/^IEEE\s(Std\s)?/, "")
19
19
  search = CGI.escape({ data: { searchTerm: code } }.to_json)
20
20
  url = "#{DOMAIN}/bin/standards/search?data=#{search}"
21
21
  resp = Faraday.get url
@@ -23,7 +23,7 @@ module RelatonIeee
23
23
  json = JSON.parse resp_json["message"]
24
24
  @array = json["response"]["searchResults"]["resultsMapList"]
25
25
  .reduce([]) do |s, hit|
26
- /^(?:\w+\s)?(?<id>[A-Z\d\.]+)(-(?<year>\d{4}))?/ =~ hit["record"]["recordTitle"]
26
+ /^(?:\w+\s)?(?<id>[A-Z\d.]+)(?:-(?<year>\d{4}))?/ =~ hit["record"]["recordTitle"]
27
27
  next s unless id && code =~ %r{^#{id}}
28
28
 
29
29
  s << Hit.new(hit["record"].merge(code: id, year: year.to_i), self)
@@ -5,7 +5,7 @@ module RelatonIeee
5
5
 
6
6
  # @param committee [Array<RelatonIeee::Committee>]
7
7
  def initialize(**args)
8
- @committee = args.delete :committee
8
+ @committee = args.delete(:committee) || []
9
9
  super
10
10
  end
11
11
 
@@ -13,7 +13,7 @@ module RelatonIeee
13
13
  # @return [RelatonIeee::IeeeBibliographicItem]
14
14
  def self.from_hash(hash)
15
15
  item_hash = ::RelatonIeee::HashConverter.hash_to_bib(hash)
16
- new **item_hash
16
+ new(**item_hash)
17
17
  end
18
18
 
19
19
  # @param opts [Hash]
@@ -22,7 +22,7 @@ module RelatonIeee
22
22
  # @option opts [String] :lang language
23
23
  # @return [String] XML
24
24
  def to_xml(**opts)
25
- super **opts do |bldr|
25
+ super(**opts) do |bldr|
26
26
  if opts[:bibdata] && committee.any?
27
27
  bldr.ext do |b|
28
28
  committee.each { |c| c.to_xml b }
@@ -34,7 +34,7 @@ module RelatonIeee
34
34
  # @return [Hash]
35
35
  def to_hash
36
36
  hash = super
37
- hash["committee"] = committee.map &:to_hash
37
+ hash["committee"] = committee.map &:to_hash if committee.any?
38
38
  hash
39
39
  end
40
40
 
@@ -19,7 +19,7 @@ module RelatonIeee
19
19
  warn "[relaton-ieee] (\"#{code}\") fetching..."
20
20
  result = search(code) || (return nil)
21
21
  year ||= code.match(/(?<=-)\d{4}/)&.to_s
22
- ret = bib_results_filter(result, year)
22
+ ret = bib_results_filter(result, code, year)
23
23
  if ret[:ret]
24
24
  item = ret[:ret].fetch
25
25
  warn "[relaton-ieee] (\"#{code}\") found #{item.docidentifier.first.id}"
@@ -42,9 +42,13 @@ module RelatonIeee
42
42
  # @param opts [Hash] options
43
43
  #
44
44
  # @return [Hash]
45
- def bib_results_filter(result, year)
45
+ def bib_results_filter(result, ref, year)
46
+ rp1 = ref_parts ref
46
47
  missed_years = []
47
48
  result.each do |hit|
49
+ rp2 = ref_parts hit.hit["recordTitle"]
50
+ next if rp1[:code] != rp2[:code] || rp1[:corr] != rp2[:corr]
51
+
48
52
  return { ret: hit } if !year
49
53
 
50
54
  return { ret: hit } if year.to_i == hit.hit[:year]
@@ -54,6 +58,15 @@ module RelatonIeee
54
58
  { years: missed_years.uniq }
55
59
  end
56
60
 
61
+ def ref_parts(ref)
62
+ %r{
63
+ ^(?:IEEE\s(?:Std\s)?)?
64
+ (?<code>[^-/]+)
65
+ (?:-(?<year>\d{4}))?
66
+ (?:/(?<corr>\w+\s\d+-\d{4}))?
67
+ }x.match ref
68
+ end
69
+
57
70
  # @param code [Strig]
58
71
  # @param year [String]
59
72
  # @param missed_years [Array<Strig>]
@@ -4,11 +4,12 @@ module RelatonIeee
4
4
  class Processor < Relaton::Processor
5
5
  attr_reader :idtype
6
6
 
7
- def initialize
7
+ def initialize # rubocop:disable Lint/MissingSuper
8
8
  @short = :relaton_ieee
9
9
  @prefix = "IEEE"
10
10
  @defaultprefix = %r{^IEEE\s}
11
11
  @idtype = "IEEE"
12
+ @datasets = %w[ieee-rawbib]
12
13
  end
13
14
 
14
15
  # @param code [String]
@@ -19,6 +20,18 @@ module RelatonIeee
19
20
  ::RelatonIeee::IeeeBibliography.get(code, date, opts)
20
21
  end
21
22
 
23
+ #
24
+ # Fetch all the documents from ./iee-rawbib directory
25
+ #
26
+ # @param [String] _source source name
27
+ # @param [Hash] opts
28
+ # @option opts [String] :output directory to output documents
29
+ # @option opts [String] :format
30
+ #
31
+ def fetch_data(_source, opts)
32
+ DataFetcher.fetch(**opts)
33
+ end
34
+
22
35
  # @param xml [String]
23
36
  # @return [RelatonIeee::IeeeBibliographicItem]
24
37
  def from_xml(xml)
@@ -0,0 +1,149 @@
1
+ module RelatonIeee
2
+ class PubId
3
+ class Id
4
+ # @return [String]
5
+ attr_reader :number
6
+
7
+ # @return [String, nil]
8
+ attr_reader :publisher, :stage, :part, :status, :approval, :edition,
9
+ :draft, :rev, :corr, :amd, :redline, :year, :month
10
+
11
+ #
12
+ # PubId constructor
13
+ #
14
+ # @param [String] number
15
+ # @param [<Hash>] **args
16
+ # @option args [String] :number
17
+ # @option args [String] :publisher
18
+ # @option args [String] :stage
19
+ # @option args [String] :part
20
+ # @option args [String] :status
21
+ # @option args [String] :approval
22
+ # @option args [String] :edition
23
+ # @option args [String] :draft
24
+ # @option args [String] :rev
25
+ # @option args [String] :corr
26
+ # @option args [String] :amd
27
+ # @option args [Boolean] :redline
28
+ # @option args [String] :year
29
+ # @option args [String] :month
30
+ #
31
+ def initialize(number:, **args) # rubocop:disable Metrics/MethodLength
32
+ @publisher = args[:publisher]
33
+ @stage = args[:stage]
34
+ @number = number
35
+ @part = args[:part]
36
+ @status = args[:status]
37
+ @approval = args[:approval]
38
+ @edition = args[:edition]
39
+ @draft = args[:draft]
40
+ @rev = args[:rev]
41
+ @corr = args[:corr]
42
+ @amd = args[:amd]
43
+ @year = args[:year]
44
+ @month = args[:month]
45
+ @redline = args[:redline]
46
+ end
47
+
48
+ #
49
+ # PubId string representation
50
+ #
51
+ # @return [String]
52
+ #
53
+ def to_s # rubocop:disable Metrics/AbcSize
54
+ out = number
55
+ out = "#{stage} #{out}" if stage
56
+ out = "#{approval} #{out}" if approval
57
+ out = "#{status} #{out}" if status
58
+ out = "#{publisher} #{out}" if publisher
59
+ out += "-#{part}" if part
60
+ out += edition_to_s + draft_to_s + rev_to_s + corr_to_s + amd_to_s
61
+ out + year_to_s + month_to_s + redline_to_s
62
+ end
63
+
64
+ def edition_to_s
65
+ edition ? "/E-#{edition}" : ""
66
+ end
67
+
68
+ def draft_to_s
69
+ draft ? "/D-#{draft}" : ""
70
+ end
71
+
72
+ def rev_to_s
73
+ rev ? "/R-#{rev}" : ""
74
+ end
75
+
76
+ def corr_to_s
77
+ corr ? "/Cor#{corr}" : ""
78
+ end
79
+
80
+ def amd_to_s
81
+ amd ? "/Amd#{amd}" : ""
82
+ end
83
+
84
+ def year_to_s
85
+ year ? ".#{year}" : ""
86
+ end
87
+
88
+ def month_to_s
89
+ month ? "-#{month}" : ""
90
+ end
91
+
92
+ def redline_to_s
93
+ redline ? " Redline" : ""
94
+ end
95
+ end
96
+
97
+ # @return [Array<RelatonIeee::PubId::Id>]
98
+ attr_reader :pubid
99
+
100
+ #
101
+ # IEEE publication id
102
+ #
103
+ # @param [Array<Hash>, Hash] pubid
104
+ #
105
+ def initialize(pubid)
106
+ @pubid = array(pubid).map { |id| Id.new(**id) }
107
+ end
108
+
109
+ #
110
+ # Convert to array
111
+ #
112
+ # @param [Array<Hash>, Hash] pid
113
+ #
114
+ # @return [Array<Hash>]
115
+ #
116
+ def array(pid)
117
+ pid.is_a?(Array) ? pid : [pid]
118
+ end
119
+
120
+ #
121
+ # PubId string representation
122
+ #
123
+ # @return [String]
124
+ #
125
+ def to_s
126
+ pubid.map(&:to_s).join("/")
127
+ end
128
+
129
+ #
130
+ # Generate ID without publisher and second number
131
+ #
132
+ # @return [String]
133
+ #
134
+ def to_id # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
135
+ out = pubid[0].to_s
136
+ if pubid.size > 1
137
+ out += pubid[1].edition_to_s if pubid[0].edition.nil?
138
+ out += pubid[1].draft_to_s if pubid[0].draft.nil?
139
+ out += pubid[1].rev_to_s if pubid[0].rev.nil?
140
+ out += pubid[1].corr_to_s if pubid[0].corr.nil?
141
+ out += pubid[1].amd_to_s if pubid[0].amd.nil?
142
+ out += pubid[1].year_to_s if pubid[0].year.nil?
143
+ out += pubid[1].month_to_s if pubid[0].month.nil?
144
+ out += pubid[1].redline_to_s unless pubid[0].redline
145
+ end
146
+ out
147
+ end
148
+ end
149
+ end