relaton-iso 1.8.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 61646e164a8a384cb744e3e50dafdea336f74cbf7418b59d1b5a30e8aabeea62
4
- data.tar.gz: f3c861c4fd11e792f9bd7d7edd0e83d95a5972c6e1590825ce0562360cc5f0d6
3
+ metadata.gz: c03256162a043a8d8842d3cd31d95dad25487cdde4992b34d44951c650421c3d
4
+ data.tar.gz: '0295b78c2fbfaaa848e07bb46fabfed6a4ea44c0af0f5e865bf1b25333a4e376'
5
5
  SHA512:
6
- metadata.gz: 073cf929ab4f17e00651c03443b4bc0d21526994ebd5532a5914f841066f4ec6522d58344ddabf09e69f409243c59c6c721db2a3c1c38c9784b6211cd48d9982
7
- data.tar.gz: 2e710926a48e59aba377f89cea8589f4dbb0abe0ddd53a906d9463faf7adf70681f8ef8d8b68f30d8398c9db4b9859a898b243ec53f5f2010dd7bd5a8e99b086
6
+ metadata.gz: f34e505b97d03f6e8ed481ee87036e51d8748e81e9ab77ea78fda8c7764eca21942a033a06c5e363b0ffacee648e3125cf93a1254897ab60512877836177ce3c
7
+ data.tar.gz: 9266cb080c0e888b9f6ffaa151d5bc118a6f3b73655cf3f9c8f9d1727dd98e779eef2047f977d492f54c668b58d5f23663374bf2adb8fe306b97f06558b3e973
@@ -16,19 +16,9 @@ jobs:
16
16
  strategy:
17
17
  fail-fast: false
18
18
  matrix:
19
- ruby: [ '2.7', '2.6', '2.5', '2.4' ]
19
+ ruby: [ '3.0', '2.7', '2.6', '2.5' ]
20
20
  os: [ ubuntu-latest, windows-latest, macos-latest ]
21
21
  experimental: [ false ]
22
- include:
23
- - ruby: '3.0'
24
- os: 'ubuntu-latest'
25
- experimental: true
26
- - ruby: '3.0'
27
- os: 'windows-latest'
28
- experimental: true
29
- - ruby: '3.0'
30
- os: 'macos-latest'
31
- experimental: true
32
22
  steps:
33
23
  - uses: actions/checkout@v2
34
24
  with:
data/.rubocop.yml CHANGED
@@ -5,6 +5,6 @@
5
5
  inherit_from:
6
6
  - https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
7
7
  AllCops:
8
- TargetRubyVersion: 2.4
8
+ TargetRubyVersion: 2.5
9
9
  Rails:
10
10
  Enabled: false
data/README.adoc CHANGED
@@ -31,7 +31,7 @@ Or install it yourself as:
31
31
 
32
32
  == Usage
33
33
 
34
- === Search for a standard using keywords
34
+ === Search for standards using keywords
35
35
 
36
36
  [source,ruby]
37
37
  ----
@@ -50,6 +50,7 @@ item = hit_collection[2].fetch
50
50
  [#<RelatonBib::FormattedString:0x007fa5dca88458
51
51
  @content=
52
52
  "ISO/TS 19115-3:2016 defines an integrated XML implementation of ISO 19115‑1, ..."
53
+ ...
53
54
 
54
55
  item.docidentifier
55
56
  => [#<RelatonBib::DocumentIdentifier:0x007fd9ce9c6878 @id="ISO/TS 19115-3:2016", @scope=nil, @type="ISO">,
@@ -59,6 +60,88 @@ item.docidentifier
59
60
  => "urn:iso:std:iso-ts:ts:19115:-3:stage-90.92:ed-1:en,fr"
60
61
  ----
61
62
 
63
+ === Fetch document by reference and year
64
+
65
+ [source,ruby]
66
+ ----
67
+ item = RelatonIso::IsoBibliography.get "ISO 19115:2003"
68
+ [relaton-iso] ("ISO 19115:2003") fetching...
69
+ [relaton-iso] ("ISO 19115:2003") found ISO 19115:2003
70
+ => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c83429e30
71
+ ...
72
+
73
+ item = RelatonIso::IsoBibliography.get "ISO 19115", "2003"
74
+ [relaton-iso] ("ISO 19115") fetching...
75
+ [relaton-iso] ("ISO 19115") found ISO 19115:2003
76
+ => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c828d3180
77
+ ...
78
+
79
+ item.docidentifier[0].id
80
+ => "ISO 19115:2003"
81
+ ----
82
+
83
+ === Fetch non-part document
84
+
85
+ [source,ruby]
86
+ ----
87
+ item = RelatonIso::IsoBibliography.get "ISO 19115"
88
+ [relaton-iso] ("ISO 19115") fetching...
89
+ [relaton-iso] ("ISO 19115") found ISO 19115:2003
90
+ => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c830275a8
91
+ ...
92
+
93
+ item.docidentifier[0].id
94
+ => "ISO 19115:2003"
95
+ ----
96
+
97
+ === Fetch part document
98
+
99
+ [source,ruby]
100
+ ----
101
+ item = RelatonIso::IsoBibliography.get "ISO 19115-1"
102
+ [relaton-iso] ("ISO 19115-1") fetching...
103
+ [relaton-iso] ("ISO 19115-1") found ISO 19115-1:2014
104
+ => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c83408af0
105
+ ...
106
+
107
+ item.docidentifier[0].id
108
+ => "ISO 19115-1:2014"
109
+ ----
110
+
111
+ === Fetch all-parts document
112
+
113
+ [source,ruby]
114
+ ----
115
+ item = RelatonIso::IsoBibliography.get "ISO 19115 (all parts)"
116
+ [relaton-iso] ("ISO 19115") fetching...
117
+ [relaton-iso] ("ISO 19115") found ISO 19115 (all parts)
118
+ => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8ca216e118
119
+ ...
120
+
121
+ item = RelatonIso::IsoBibliography.get "ISO 19115", nil, all_parts: true
122
+ [relaton-iso] ("ISO 19115") fetching...
123
+ [relaton-iso] ("ISO 19115") found ISO 19115 (all parts)
124
+ => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c830f3d38
125
+ ...
126
+
127
+ item.docidentifier[0].id
128
+ => "ISO 19115 (all parts)"
129
+
130
+ item = RelatonIso::IsoBibliography.get "ISO 19115-1 (all parts)"
131
+ [relaton-iso] ("ISO 19115-1") fetching...
132
+ [relaton-iso] ("ISO 19115-1") found ISO 19115 (all parts)
133
+ => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c8290e5a0
134
+
135
+ item = RelatonIso::IsoBibliography.get "ISO 19115-1", nil, all_parts: true
136
+ [relaton-iso] ("ISO 19115-1") fetching...
137
+ [relaton-iso] ("ISO 19115-1") found ISO 19115 (all parts)
138
+ => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c925355b8
139
+ ...
140
+
141
+ item.docidentifier[0].id
142
+ => "ISO 19115 (all parts)"
143
+ ----
144
+
62
145
  === Search for ISO/IEC Directives
63
146
 
64
147
  The ISO/IEC Derectives are stored in a static cache in a relaton gem. It needs to use the relaton gem to fetch the ISO/IEC Directives. Folloving reaferences are allowed to fetch:
data/bin/rackup ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rackup' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("rack", "rackup")
data/bin/rubocop ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'rubocop' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("rubocop", "rubocop")
data/bin/ruby-parse ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'ruby-parse' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("parser", "ruby-parse")
data/bin/ruby-rewrite ADDED
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # This file was generated by Bundler.
6
+ #
7
+ # The application 'ruby-rewrite' is installed as part of a gem, and
8
+ # this file is here to facilitate running it.
9
+ #
10
+
11
+ require "pathname"
12
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
13
+ Pathname.new(__FILE__).realpath)
14
+
15
+ bundle_binstub = File.expand_path("../bundle", __FILE__)
16
+
17
+ if File.file?(bundle_binstub)
18
+ if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
19
+ load(bundle_binstub)
20
+ else
21
+ abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
22
+ Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
23
+ end
24
+ end
25
+
26
+ require "rubygems"
27
+ require "bundler/setup"
28
+
29
+ load Gem.bin_path("parser", "ruby-rewrite")
@@ -4,7 +4,7 @@ module RelatonIso
4
4
  # Hit.
5
5
  class Hit < RelatonBib::Hit
6
6
  # @return [RelatonIsoBib::IsoBibliographicItem]
7
- attr_accessor :fetch
7
+ attr_writer :fetch
8
8
 
9
9
  # Parse page.
10
10
  # @param lang [String, NilClass]
@@ -15,11 +15,12 @@ module RelatonIso
15
15
 
16
16
  # @return [Integer]
17
17
  def sort_weight
18
- case hit["publicationStatus"] && hit["publicationStatus"]["key"]
19
- when "ENT_ACTIVE" then 0
20
- when "ENT_PROGRESS" then 1
21
- when "ENT_INACTIVE" then 2
22
- else 3
18
+ case hit[:status] # && hit["publicationStatus"]["key"]
19
+ when "Published" then 0
20
+ when "Under development" then 1
21
+ when "Withdrawn" then 2
22
+ when "Deleted" then 3
23
+ else 4
23
24
  end
24
25
  end
25
26
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "algolia"
3
4
  require "relaton_iso/hit"
4
5
 
5
6
  module RelatonIso
@@ -16,18 +17,22 @@ module RelatonIso
16
17
  # @param lang [String, NilClass]
17
18
  # @return [RelatonIsoBib::IsoBibliographicItem]
18
19
  def to_all_parts(lang = nil) # rubocop:disable Metrics/CyclomaticComplexity
19
- parts = @array.reject { |h| h.hit["docPart"]&.empty? }
20
- hit = parts.min_by { |h| h.hit["docPart"].to_i }
20
+ # parts = @array.reject { |h| h.hit["docPart"]&.empty? }
21
+ hit = @array.min_by do |h|
22
+ IsoBibliography.ref_components(h.hit[:title])[1].to_i
23
+ end
21
24
  return @array.first.fetch lang unless hit
22
25
 
23
26
  bibitem = hit.fetch lang
24
27
  all_parts_item = bibitem.to_all_parts
25
- parts.reject { |h| h.hit["docRef"] == hit.hit["docRef"] }.each do |hi|
28
+ @array.reject { |h| h.hit[:uuid] == hit.hit[:uuid] }.each do |hi|
29
+ %r{^(?<fr>ISO(?:\s|/)[^-/:()]+(?:-[\w-]+)?(?::\d{4})?
30
+ (?:/\w+(?:\s\w+)?\s\d+(?:\d{4})?)?)}x =~ hi.hit[:title]
26
31
  isobib = RelatonIsoBib::IsoBibliographicItem.new(
27
- formattedref: RelatonBib::FormattedRef.new(content: hi.hit["docRef"])
32
+ formattedref: RelatonBib::FormattedRef.new(content: fr),
28
33
  )
29
34
  all_parts_item.relation << RelatonBib::DocumentRelation.new(
30
- type: "instance", bibitem: isobib
35
+ type: "instance", bibitem: isobib,
31
36
  )
32
37
  end
33
38
  all_parts_item
@@ -49,8 +54,8 @@ module RelatonIso
49
54
 
50
55
  hash = YAML.safe_load resp.body
51
56
  bib_hash = RelatonIsoBib::HashConverter.hash_to_bib hash
52
- bib = RelatonIsoBib::IsoBibliographicItem.new **bib_hash
53
- hit = Hit.new({ "docRef" => text }, self)
57
+ bib = RelatonIsoBib::IsoBibliographicItem.new(**bib_hash)
58
+ hit = Hit.new({ title: text }, self)
54
59
  hit.fetch = bib
55
60
  [hit]
56
61
  end
@@ -61,21 +66,28 @@ module RelatonIso
61
66
  # @return [Array<RelatonIso::Hit>]
62
67
  #
63
68
  def fetch_iso # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
64
- %r{\s(?<num>\d+)(?:-(?<part>[\d-]+))?} =~ text
65
- http = Net::HTTP.new "www.iso.org", 443
66
- http.use_ssl = true
67
- search = ["status=ENT_ACTIVE,ENT_PROGRESS,ENT_INACTIVE,ENT_DELETED"]
68
- search << "docNumber=#{num}"
69
- search << "docPartNo=#{part}" if part
70
- q = search.join "&"
71
- resp = http.get("/cms/render/live/en/sites/isoorg.advancedSearch.do?#{q}",
72
- "Accept" => "application/json, text/plain, */*")
73
- return [] if resp.body.empty?
69
+ # %r{\s(?<num>\d+)(?:-(?<part>[\d-]+))?} =~ text
70
+ # http = Net::HTTP.new "www.iso.org", 443
71
+ # http.use_ssl = true
72
+ # search = ["status=ENT_ACTIVE,ENT_PROGRESS,ENT_INACTIVE,ENT_DELETED"]
73
+ # search << "docNumber=#{num}"
74
+ # search << "docPartNo=#{part}" if part
75
+ # q = search.join "&"
76
+ # resp = http.get("/cms/render/live/en/sites/isoorg.advancedSearch.do?#{q}",
77
+ # "Accept" => "application/json, text/plain, */*")
78
+ config = Algolia::Search::Config.new(application_id: "JCL49WV5AR", api_key: "dd1b9e1ab383f4d4817d29cd5e96d3f0")
79
+ client = Algolia::Search::Client.new config, logger: ::Logger.new($stderr)
80
+ index = client.init_index "all_en"
81
+ resp = index.search text, hitsPerPage: 100, filters: "category:standard"
82
+ # return [] if resp.body.empty?
74
83
 
75
- json = JSON.parse resp.body
76
- json["standards"].map { |h| Hit.new h, self }.sort! do |a, b|
77
- if a.sort_weight == b.sort_weight
78
- (parse_date(b.hit) - parse_date(a.hit)).to_i
84
+ # json = JSON.parse resp.body
85
+ # json["standards"]
86
+ resp[:hits].map { |h| Hit.new h, self }.sort! do |a, b|
87
+ if a.sort_weight == b.sort_weight && b.hit[:year] = a.hit[:year]
88
+ a.hit[:title] <=> b.hit[:title]
89
+ elsif a.sort_weight == b.sort_weight
90
+ b.hit[:year] - a.hit[:year]
79
91
  else
80
92
  a.sort_weight - b.sort_weight
81
93
  end
@@ -84,16 +96,16 @@ module RelatonIso
84
96
 
85
97
  # @param hit [Hash]
86
98
  # @return [Date]
87
- def parse_date(hit)
88
- if hit["publicationDate"]
89
- Date.strptime(hit["publicationDate"], "%Y-%m")
90
- elsif %r{:(?<year>\d{4})} =~ hit["docRef"]
91
- Date.strptime(year, "%Y")
92
- elsif hit["newProjectDate"]
93
- Date.parse hit["newProjectDate"]
94
- else
95
- Date.new 0
96
- end
97
- end
99
+ # def parse_date(hit)
100
+ # if hit["publicationDate"]
101
+ # Date.strptime(hit["publicationDate"], "%Y-%m")
102
+ # elsif %r{:(?<year>\d{4})} =~ hit["docRef"]
103
+ # Date.strptime(year, "%Y")
104
+ # elsif hit["newProjectDate"]
105
+ # Date.parse hit["newProjectDate"]
106
+ # else
107
+ # Date.new 0
108
+ # end
109
+ # end
98
110
  end
99
111
  end
@@ -15,8 +15,9 @@ module RelatonIso
15
15
  HitCollection.new text.gsub(/\u2013/, "-")
16
16
  rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
17
17
  EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
18
- Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT
19
- raise RelatonBib::RequestError, "Could not access http://www.iso.org"
18
+ Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT,
19
+ Algolia::AlgoliaUnreachableHostError => e
20
+ raise RelatonBib::RequestError, e.message
20
21
  end
21
22
 
22
23
  # @param ref [String] the ISO standard Code to look up (e..g "ISO 9000")
@@ -25,11 +26,15 @@ module RelatonIso
25
26
  # reference is required, :keep_year if undated reference should
26
27
  # return actual reference with year
27
28
  # @return [String] Relaton XML serialisation of reference
28
- def get(ref, year = nil, opts = {})
29
+ def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
29
30
  code = ref.gsub(/\u2013/, "-")
30
- %r{\s(?<num>\d+)(-(?<part>[\d-]+))?(:(?<year1>\d{4}))?} =~ code
31
+ # %r{\s(?<num>\d+)(?:-(?<part>[\d-]+))?(?::(?<year1>\d{4}))?} =~ code
32
+ _, _part, year1, = ref_components ref
31
33
  year ||= year1
32
- opts[:all_parts] ||= !part && opts[:all_parts].nil? # && code2.nil?
34
+ code.sub! " (all parts)", ""
35
+ opts[:all_parts] ||= $~ && opts[:all_parts].nil?
36
+ opts[:keep_year] ||= opts[:keep_year].nil?
37
+ # code.sub!("#{num}-#{part}", num) if opts[:all_parts] && part
33
38
  # if %r[^ISO/IEC DIR].match? code
34
39
  # return RelatonIec::IecBibliography.get(code, year, opts)
35
40
  # end
@@ -44,6 +49,16 @@ module RelatonIso
44
49
  end
45
50
  end
46
51
 
52
+ def ref_components(ref)
53
+ %r{
54
+ ^(?<code>ISO(?:\s|/)[^-/:()]+\d+)
55
+ (?:-(?<part>[\w-]+))?
56
+ (?::(?<year>\d{4}))?
57
+ (?:/(?<corr>\w+(?:\s\w+)?\s\d+)(?:(?<coryear>\d{4}))?)?
58
+ }x =~ ref
59
+ [code&.strip, part, year, corr, coryear]
60
+ end
61
+
47
62
  private
48
63
 
49
64
  # rubocop:disable Metrics/MethodLength
@@ -51,19 +66,19 @@ module RelatonIso
51
66
  def fetch_ref_err(code, year, missed_years)
52
67
  id = year ? "#{code}:#{year}" : code
53
68
  warn "[relaton-iso] WARNING: no match found online for #{id}. "\
54
- "The code must be exactly like it is on the standards website."
69
+ "The code must be exactly like it is on the standards website."
55
70
  unless missed_years.empty?
56
71
  warn "[relaton-iso] (There was no match for #{year}, though there "\
57
- "were matches found for #{missed_years.join(', ')}.)"
72
+ "were matches found for #{missed_years.join(', ')}.)"
58
73
  end
59
74
  if /\d-\d/.match? code
60
75
  warn "[relaton-iso] The provided document part may not exist, "\
61
- "or the document may no longer be published in parts."
76
+ "or the document may no longer be published in parts."
62
77
  else
63
78
  warn "[relaton-iso] If you wanted to cite all document parts for "\
64
- "the reference, use \"#{code} (all parts)\".\nIf the document is "\
65
- "not a standard, use its document type abbreviation (TS, TR, PAS, "\
66
- "Guide)."
79
+ "the reference, use \"#{code} (all parts)\".\nIf the document "\
80
+ "is not a standard, use its document type abbreviation "\
81
+ "(TS, TR, PAS, Guide)."
67
82
  end
68
83
  nil
69
84
  end
@@ -76,18 +91,20 @@ module RelatonIso
76
91
  # @param opts [Hash]
77
92
  # @return [Array<RelatonIso::Hit>]
78
93
  def isobib_search_filter(code, opts)
94
+ ref = remove_part code, opts[:all_parts]
79
95
  warn "[relaton-iso] (\"#{code}\") fetching..."
80
- result = search(code)
96
+ result = search(ref)
81
97
  res = search_code result, code, opts
82
98
  return res unless res.empty?
83
99
 
84
100
  # try stages
85
- if %r{^\w+/[^/]+\s\d+} =~ code # code like ISO/IEC 123, ISO/IEC/IEE 123
101
+ case code
102
+ when %r{^\w+/[^/]+\s\d+} # code like ISO/IEC 123, ISO/IEC/IEE 123
86
103
  res = try_stages(result, opts) do |st|
87
104
  code.sub(%r{^(?<pref>[^\s]+\s)}) { "#{$~[:pref]}#{st} " }
88
105
  end
89
106
  return res unless res.empty?
90
- elsif %r{^\w+\s\d+} =~ code # code like ISO 123
107
+ when %r{^\w+\s\d+} # code like ISO 123
91
108
  res = try_stages(result, opts) do |st|
92
109
  code.sub(%r{^(?<pref>\w+)}) { "#{$~[:pref]}/#{st}" }
93
110
  end
@@ -103,6 +120,12 @@ module RelatonIso
103
120
  end
104
121
  # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
105
122
 
123
+ def remove_part(ref, all_parts)
124
+ return ref unless all_parts
125
+
126
+ ref.sub %r{(\S+\s\d+)[\d-]+}, '\1'
127
+ end
128
+
106
129
  # @param result [RelatonIso::HitCollection]
107
130
  # @param opts [Hash]
108
131
  # @return [RelatonIso::HitCollection]
@@ -120,25 +143,15 @@ module RelatonIso
120
143
  # @param code [String]
121
144
  # @param opts [Hash]
122
145
  # @return [RelatonIso::HitCollection]
123
- def search_code(result, code, _opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
124
- code1, part1, corr1, coryear1 = ref_components code
146
+ def search_code(result, code, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
147
+ code1, part1, _, corr1, coryear1 = ref_components code
125
148
  result.select do |i|
126
- code2, part2, corr2, coryear2 = ref_components i.hit["docRef"]
127
- code1 == code2 && (!part1 || part1 == part2) &&
149
+ code2, part2, _, corr2, coryear2 = ref_components i.hit[:title]
150
+ code1 == code2 && (opts[:all_parts] && part2 || !opts[:all_parts] && part1 == part2) &&
128
151
  corr1 == corr2 && (!coryear1 || coryear1 == coryear2)
129
152
  end
130
153
  end
131
154
 
132
- def ref_components(ref)
133
- %r{
134
- ^(?<code>ISO(?:\s|/)[^-/:()]+)
135
- (?:-(?<part>[^:/]+))?
136
- (?::\d{4})?
137
- (?:/(?<corr>\w+(?:\s\w+)?\s\d+)(?:(?<coryear>\d{4}))?)?
138
- }x =~ ref
139
- [code&.strip, part, corr, coryear]
140
- end
141
-
142
155
  # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
143
156
 
144
157
  # Sort through the results from RelatonIso, fetching them three at a time,
@@ -151,7 +164,7 @@ module RelatonIso
151
164
  def isobib_results_filter(result, year, opts)
152
165
  missed_years = []
153
166
  hits = result.reduce!([]) do |hts, h|
154
- if !year || %r{:(?<iyear>\d{4})(?!.*:\d{4})} =~ h.hit["docRef"] && iyear == year
167
+ if !year || %r{:(?<iyear>\d{4})(?!.*:\d{4})} =~ h.hit[:title] && iyear == year
155
168
  hts << h
156
169
  else
157
170
  missed_years << iyear
@@ -7,8 +7,7 @@ require "net/http"
7
7
 
8
8
  module RelatonIso
9
9
  # Scrapper.
10
- # rubocop:disable Metrics/ModuleLength
11
- module Scrapper
10
+ module Scrapper # rubocop:disable Metrics/ModuleLength
12
11
  DOMAIN = "https://www.iso.org"
13
12
 
14
13
  TYPES = {
@@ -55,9 +54,9 @@ module RelatonIso
55
54
  # @param lang [String, NilClass]
56
55
  # @return [Hash]
57
56
  def parse_page(hit_data, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
58
- path = "/contents/data/standard#{hit_data['splitPath']}/"\
59
- "#{hit_data['csnumber']}.html"
60
- doc, url = get_page path
57
+ # path = "/contents/data/standard#{hit_data['splitPath']}/"\
58
+ # "#{hit_data['csnumber']}.html"
59
+ doc, url = get_page "#{hit_data[:path].sub '/sites/isoorg', ''}.html"
61
60
 
62
61
  # Fetch edition.
63
62
  edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")
@@ -67,24 +66,24 @@ module RelatonIso
67
66
 
68
67
  RelatonIsoBib::IsoBibliographicItem.new(
69
68
  fetched: Date.today.to_s,
70
- docid: fetch_docid(hit_data, langs),
69
+ docid: fetch_docid(doc, edition, langs),
71
70
  docnumber: fetch_docnumber(doc),
72
71
  edition: edition,
73
72
  language: langs.map { |l| l[:lang] },
74
73
  script: langs.map { |l| script(l[:lang]) }.uniq,
75
74
  title: titles,
76
- doctype: fetch_type(hit_data["docRef"]),
75
+ doctype: fetch_type(hit_data[:title]),
77
76
  docstatus: fetch_status(doc),
78
77
  ics: fetch_ics(doc),
79
- date: fetch_dates(doc, hit_data["docRef"]),
80
- contributor: fetch_contributors(hit_data["docRef"]),
78
+ date: fetch_dates(doc, hit_data[:title]),
79
+ contributor: fetch_contributors(hit_data[:title]),
81
80
  editorialgroup: fetch_workgroup(doc),
82
81
  abstract: abstract,
83
- copyright: fetch_copyright(hit_data["docRef"], doc),
82
+ copyright: fetch_copyright(doc),
84
83
  link: fetch_link(doc, url),
85
84
  relation: fetch_relations(doc),
86
85
  place: ["Geneva"],
87
- structuredidentifier: fetch_structuredidentifier(doc)
86
+ structuredidentifier: fetch_structuredidentifier(doc),
88
87
  )
89
88
  end
90
89
 
@@ -94,8 +93,7 @@ module RelatonIso
94
93
  # @param doc [Nokigiri::HTML::Document]
95
94
  # @param lang [String, NilClass]
96
95
  # @return [Array<Array>]
97
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
98
- def fetch_titles_abstract(doc, lang)
96
+ def fetch_titles_abstract(doc, lang) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
99
97
  titles = RelatonBib::TypedTitleStringCollection.new
100
98
  abstract = []
101
99
  langs = languages(doc, lang).reduce([]) do |s, l|
@@ -107,7 +105,11 @@ module RelatonIso
107
105
  titles += fetch_title(d, l[:lang])
108
106
 
109
107
  # Fetch abstracts.
110
- abstract_content = d.css("div[itemprop='description'] p").text
108
+ abstract_content = d.xpath(
109
+ "//div[@itemprop='description']/p|//div[@itemprop='description']/ul/li",
110
+ ).map do |a|
111
+ a.name == "li" ? "- #{a.text}" : a.text
112
+ end.reject(&:empty?).join("\n")
111
113
  unless abstract_content.empty?
112
114
  abstract << {
113
115
  content: abstract_content,
@@ -121,7 +123,6 @@ module RelatonIso
121
123
  end
122
124
  [titles, abstract, langs]
123
125
  end
124
- # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
125
126
 
126
127
  # Returns available languages.
127
128
  # @param doc [Nokogiri::HTML::Document]
@@ -131,7 +132,7 @@ module RelatonIso
131
132
  lgs = [{ lang: "en" }]
132
133
  doc.css("li#lang-switcher ul li a").each do |lang_link|
133
134
  lang_path = lang_link.attr("href")
134
- l = lang_path.match(%r{^\/(fr)\/})
135
+ l = lang_path.match(%r{^/(fr)/})
135
136
  lgs << { lang: l[1], path: lang_path } if l && (!lang || l[1] == lang)
136
137
  end
137
138
  lgs
@@ -168,67 +169,80 @@ module RelatonIso
168
169
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
169
170
 
170
171
  # Fetch docid.
171
- # @param hit [Hash]
172
+ # @param doc [Nokogiri:HTML::Document]
173
+ # @param edition [String]
172
174
  # @param langs [Array<Hash>]
173
175
  # @return [Array<RelatonBib::DocumentIdentifier>]
174
- def fetch_docid(hit, langs)
176
+ def fetch_docid(doc, edition, langs)
177
+ pubid = item_ref doc
175
178
  [
176
- RelatonBib::DocumentIdentifier.new(id: hit["docRef"], type: "ISO"),
177
- RelatonBib::DocumentIdentifier.new(id: fetch_urn(hit, langs),
178
- type: "URN"),
179
+ RelatonBib::DocumentIdentifier.new(id: pubid, type: "ISO"),
180
+ RelatonBib::DocumentIdentifier.new(
181
+ id: fetch_urn(doc, pubid, edition, langs), type: "URN",
182
+ ),
179
183
  ]
180
184
  end
181
185
 
182
- # @param hit [Hash]
186
+ # @param doc [Nokogiri:HTML::Document]
187
+ # @param pubid [String]
188
+ # @param edition [String]
183
189
  # @param langs [Array<Hash>]
184
190
  # @returnt [String]
185
- def fetch_urn(hit, langs) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
186
- orig = hit["docRef"].split(" ").first.downcase.split("/").join "-"
187
- %r{(?<=)(?<type>DATA|GUIDE|ISP|IWA|PAS|R|TR|TS|TTA)} =~ hit["docRef"]
191
+ def fetch_urn(doc, pubid, edition, langs) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
192
+ orig = pubid.split.first.downcase.split("/").join "-"
193
+ %r{(?<=)(?<type>DATA|GUIDE|ISP|IWA|PAS|R|TR|TS|TTA)} =~ pubid
194
+ _, part, _year, corr, = IsoBibliography.ref_components pubid
188
195
  urn = "urn:iso:std:#{orig}"
189
196
  urn += ":#{type.downcase}" if type
190
- urn += ":#{hit['docNumber']}"
191
- urn += ":-#{hit['docPart']}" if hit["docPart"] && !hit["docPart"].empty?
192
- urn += ":stage-#{hit['stageId']}"
193
- urn += ":ed-#{hit['docEdition']}" if hit["docEdition"]
194
- if hit["docElem"] && !hit["docElem"].empty? && hit["docElem"] != "0"
195
- urn += ":#{hit['docElem'].downcase}:#{hit['docElemSeq']}"
197
+ urn += ":#{fetch_docnumber(doc)}"
198
+ urn += ":-#{part}" if part
199
+ urn += ":stage-#{stage_code(doc)}"
200
+ urn += ":ed-#{edition}" if edition
201
+ if corr
202
+ corrparts = corr.split
203
+ urn += ":#{corrparts[0].downcase}:#{corrparts[-1]}"
196
204
  end
197
- urn += ":" + langs.map { |l| l[:lang] }.join(",")
205
+ urn += ":#{langs.map { |l| l[:lang] }.join(',')}"
198
206
  urn
199
207
  end
200
208
 
201
209
  def fetch_docnumber(doc)
202
- id = doc.at("//nav[contains(@class, 'heading-condensed')]/h1")&.text
203
- id&.match(/\d+/)&.to_s
210
+ item_ref(doc)&.match(/\d+/)&.to_s
204
211
  end
205
212
 
206
213
  # @param doc [Nokogiri::HTML::Document]
207
214
  def fetch_structuredidentifier(doc) # rubocop:disable Metrics/MethodLength
208
- item_ref = doc.at("//nav[contains(@class, 'heading-condensed')]/h1")
209
- unless item_ref
215
+ ref = item_ref doc
216
+ unless ref
210
217
  return RelatonIsoBib::StructuredIdentifier.new(
211
- project_number: "?", part_number: "", prefix: nil, id: "?"
218
+ project_number: "?", part_number: "", prefix: nil, id: "?",
212
219
  )
213
220
  end
214
221
 
215
- m = item_ref.text.match(/^(.*?\d+)-?((?<=-)\d+|)/)
222
+ m = ref.match(/^(.*?\d+)-?((?<=-)\d+|)/)
216
223
  RelatonIsoBib::StructuredIdentifier.new(
217
- project_number: m[1], part_number: m[2], prefix: nil,
218
- id: item_ref.text, type: "ISO"
224
+ project_number: m[1], part: m[2], type: "ISO",
219
225
  )
220
226
  end
221
227
 
228
+ def item_ref(doc)
229
+ doc.at("//nav[contains(@class, 'heading-condensed')]/h1")&.text
230
+ end
231
+
222
232
  # Fetch status.
223
233
  # @param doc [Nokogiri::HTML::Document]
224
234
  # @param status [String]
225
235
  # @return [Hash]
226
236
  def fetch_status(doc)
227
- stg, substg = doc.at("//ul[@class='dropdown-menu']/li[@class='active']/a/span[@class='stage-code']")
228
- .text.split "."
237
+ stg, substg = stage_code(doc).split "."
229
238
  RelatonBib::DocumentStatus.new(stage: stg, substage: substg)
230
239
  end
231
240
 
241
+ def stage_code(doc)
242
+ doc.at("//ul[@class='dropdown-menu']/li[@class='active']"\
243
+ "/a/span[@class='stage-code']").text
244
+ end
245
+
232
246
  # def stage(stg, substg)
233
247
  # abbr = STGABBR[stg].is_a?(Hash) ? STGABBR[stg][substg] : STGABBR[stg]
234
248
  # RelatonBib::DocumentStatus::Stage.new value: stg, abbreviation: abbr
@@ -241,13 +255,15 @@ module RelatonIso
241
255
  wg_link = doc.css("div.entry-name.entry-block a")[0]
242
256
  # wg_url = DOMAIN + wg_link['href']
243
257
  workgroup = wg_link.text.split "/"
258
+ type = workgroup[1]&.match(/^[A-Z]+/)&.to_s || "TC"
244
259
  {
245
260
  name: "International Organization for Standardization",
246
261
  abbreviation: "ISO",
247
262
  url: "www.iso.org",
248
263
  technical_committee: [{
249
- name: wg_link.text + doc.css("div.entry-title")[0].text,
250
- type: "TC",
264
+ name: doc.css("div.entry-title")[0].text,
265
+ identifier: wg_link.text,
266
+ type: type,
251
267
  number: workgroup[1]&.match(/\d+/)&.to_s&.to_i,
252
268
  }],
253
269
  }
@@ -274,10 +290,10 @@ module RelatonIso
274
290
  else
275
291
  a + r.css("a").map do |id|
276
292
  fref = RelatonBib::FormattedRef.new(
277
- content: id.text, format: "text/plain"
293
+ content: id.text, format: "text/plain",
278
294
  )
279
295
  bibitem = RelatonIsoBib::IsoBibliographicItem.new(
280
- formattedref: fref, date: date
296
+ formattedref: fref, date: date,
281
297
  )
282
298
  { type: type, bibitem: bibitem }
283
299
  end
@@ -311,7 +327,7 @@ module RelatonIso
311
327
  def fetch_title(doc, lang)
312
328
  content = doc.at(
313
329
  "//nav[contains(@class,'heading-condensed')]/h2 | "\
314
- "//nav[contains(@class,'heading-condensed')]/h3"
330
+ "//nav[contains(@class,'heading-condensed')]/h3",
315
331
  )&.text&.gsub(/\u2014/, "-")
316
332
  return RelatonBib::TypedTitleStringCollection.new unless content
317
333
 
@@ -373,7 +389,7 @@ module RelatonIso
373
389
  def fetch_ics(doc)
374
390
  doc.xpath("//strong[contains(text(), "\
375
391
  "'ICS')]/../following-sibling::dd/div/a").map do |i|
376
- code = i.text.match(/[\d\.]+/).to_s.split "."
392
+ code = i.text.match(/[\d.]+/).to_s.split "."
377
393
  { field: code[0], group: code[1], subgroup: code[2] }
378
394
  end
379
395
  end
@@ -395,10 +411,10 @@ module RelatonIso
395
411
  end
396
412
 
397
413
  # Fetch copyright.
398
- # @param ref [String]
399
414
  # @param doc [Nokogiri::HTML::Document]
400
415
  # @return [Array<Hash>]
401
- def fetch_copyright(ref, doc)
416
+ def fetch_copyright(doc)
417
+ ref = item_ref doc
402
418
  owner_name = ref.match(/.*?(?=\s)/).to_s
403
419
  from = ref.match(/(?<=:)\d{4}/).to_s
404
420
  if from.empty?
@@ -408,5 +424,4 @@ module RelatonIso
408
424
  end
409
425
  end
410
426
  end
411
- # rubocop:enable Metrics/ModuleLength
412
427
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RelatonIso
4
- VERSION = "1.8.0"
4
+ VERSION = "1.9.0"
5
5
  end
data/relaton_iso.gemspec CHANGED
@@ -32,11 +32,15 @@ Gem::Specification.new do |spec|
32
32
  spec.add_development_dependency "pry-byebug"
33
33
  spec.add_development_dependency "rake", "~> 13.0"
34
34
  spec.add_development_dependency "rspec", "~> 3.0"
35
+ spec.add_development_dependency "rubocop"
36
+ spec.add_development_dependency "rubocop-performance"
37
+ spec.add_development_dependency "rubocop-rails"
35
38
  # spec.add_development_dependency "ruby-debug-ide"
36
39
  spec.add_development_dependency "simplecov"
37
40
  spec.add_development_dependency "vcr"
38
41
  spec.add_development_dependency "webmock"
39
42
 
40
43
  # spec.add_dependency "relaton-iec", "~> 1.8.0"
41
- spec.add_dependency "relaton-iso-bib", "~> 1.8.0"
44
+ spec.add_dependency "algolia"
45
+ spec.add_dependency "relaton-iso-bib", "~> 1.9.0"
42
46
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iso
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.0
4
+ version: 1.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-05-17 00:00:00.000000000 Z
11
+ date: 2021-08-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug
@@ -80,6 +80,48 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '3.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubocop
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rubocop-performance
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rubocop-rails
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
83
125
  - !ruby/object:Gem::Dependency
84
126
  name: simplecov
85
127
  requirement: !ruby/object:Gem::Requirement
@@ -122,20 +164,34 @@ dependencies:
122
164
  - - ">="
123
165
  - !ruby/object:Gem::Version
124
166
  version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: algolia
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ type: :runtime
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
125
181
  - !ruby/object:Gem::Dependency
126
182
  name: relaton-iso-bib
127
183
  requirement: !ruby/object:Gem::Requirement
128
184
  requirements:
129
185
  - - "~>"
130
186
  - !ruby/object:Gem::Version
131
- version: 1.8.0
187
+ version: 1.9.0
132
188
  type: :runtime
133
189
  prerelease: false
134
190
  version_requirements: !ruby/object:Gem::Requirement
135
191
  requirements:
136
192
  - - "~>"
137
193
  - !ruby/object:Gem::Version
138
- version: 1.8.0
194
+ version: 1.9.0
139
195
  description: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
140
196
  model'
141
197
  email:
@@ -165,9 +221,13 @@ files:
165
221
  - bin/nokogiri
166
222
  - bin/pry
167
223
  - bin/racc
224
+ - bin/rackup
168
225
  - bin/rake
169
226
  - bin/rdebug-ide
170
227
  - bin/rspec
228
+ - bin/rubocop
229
+ - bin/ruby-parse
230
+ - bin/ruby-rewrite
171
231
  - bin/safe_yaml
172
232
  - bin/setup
173
233
  - lib/relaton_iso.rb