ituBib 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 33e1ba7e42b32d864a7a36d015bcb12f9598dfbc
4
+ data.tar.gz: 01d5acc567d8bc229db69efe0f491ddcf3cd9632
5
+ SHA512:
6
+ metadata.gz: 89b11fd0c276a4fc0dc2e6d8b1095e55d768309858d0a72fa3a4797572cbbbbfcb72f33276b985294a9f212aa2e78ff3d1e775a7befb2ec5747c140a14ad54ce
7
+ data.tar.gz: 5f0e981c92023e96c7400d04323f0dbfbb995c9f59d07d079d011598c7fcf99540b9a26473db74bfe55417e1e7a91e52fac46d3c491fe2f426cb09249a30a458
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ sudo: false
3
+ language: ruby
4
+ cache: bundler
5
+ rvm:
6
+ - 2.4.0
7
+ before_install: gem install bundler -v 2.0.1
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in itubib.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,80 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ ituBib (0.1.0)
5
+ iso-bib-item (~> 0.4.2)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ addressable (2.6.0)
11
+ public_suffix (>= 2.0.2, < 4.0)
12
+ byebug (11.0.0)
13
+ coderay (1.1.2)
14
+ crack (0.4.3)
15
+ safe_yaml (~> 1.0.0)
16
+ diff-lcs (1.3)
17
+ docile (1.3.1)
18
+ equivalent-xml (0.6.0)
19
+ nokogiri (>= 1.4.3)
20
+ hashdiff (0.3.8)
21
+ iso-bib-item (0.4.4)
22
+ isoics (~> 0.1.6)
23
+ nokogiri (~> 1.8.4)
24
+ ruby_deep_clone (~> 0.8.0)
25
+ isoics (0.1.7)
26
+ json (2.2.0)
27
+ method_source (0.9.2)
28
+ mini_portile2 (2.3.0)
29
+ nokogiri (1.8.5)
30
+ mini_portile2 (~> 2.3.0)
31
+ pry (0.12.2)
32
+ coderay (~> 1.1.0)
33
+ method_source (~> 0.9.0)
34
+ pry-byebug (3.7.0)
35
+ byebug (~> 11.0)
36
+ pry (~> 0.10)
37
+ public_suffix (3.0.3)
38
+ rake (10.5.0)
39
+ rspec (3.8.0)
40
+ rspec-core (~> 3.8.0)
41
+ rspec-expectations (~> 3.8.0)
42
+ rspec-mocks (~> 3.8.0)
43
+ rspec-core (3.8.0)
44
+ rspec-support (~> 3.8.0)
45
+ rspec-expectations (3.8.2)
46
+ diff-lcs (>= 1.2.0, < 2.0)
47
+ rspec-support (~> 3.8.0)
48
+ rspec-mocks (3.8.0)
49
+ diff-lcs (>= 1.2.0, < 2.0)
50
+ rspec-support (~> 3.8.0)
51
+ rspec-support (3.8.0)
52
+ ruby_deep_clone (0.8.0)
53
+ safe_yaml (1.0.5)
54
+ simplecov (0.16.1)
55
+ docile (~> 1.1)
56
+ json (>= 1.8, < 3)
57
+ simplecov-html (~> 0.10.0)
58
+ simplecov-html (0.10.2)
59
+ vcr (4.0.0)
60
+ webmock (3.5.1)
61
+ addressable (>= 2.3.6)
62
+ crack (>= 0.3.2)
63
+ hashdiff
64
+
65
+ PLATFORMS
66
+ ruby
67
+
68
+ DEPENDENCIES
69
+ bundler (~> 2.0)
70
+ equivalent-xml (~> 0.6)
71
+ ituBib!
72
+ pry-byebug
73
+ rake (~> 10.0)
74
+ rspec (~> 3.0)
75
+ simplecov
76
+ vcr
77
+ webmock
78
+
79
+ BUNDLED WITH
80
+ 2.0.1
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2019 Andrei Kislichenko
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.adoc ADDED
@@ -0,0 +1,76 @@
1
+ = ItuBib: retrieve ITU Standards for bibliographic use using the BibliographicItem model
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/itubib`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ ItuBib is a Ruby gem that implements the https://github.com/riboseinc/isodoc-models#iso-bibliographic-item[IsoBibliographicItem model].
6
+
7
+ == Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ [source,ruby]
12
+ ----
13
+ gem 'itubib'
14
+ ----
15
+
16
+ And then execute:
17
+
18
+ $ bundle
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install itubib
23
+
24
+ == Usage
25
+
26
+ === Search for a standard using keywords
27
+
28
+ [source,ruby]
29
+ ----
30
+ require 'itubib'
31
+
32
+ hit_collection = ItuBib::ItuBibliography.search("ITU-T L.163")
33
+ => [<ItuBib::Hit:0x007f92b5d693c8 @text="ITU-T L.163" @fetched="false" @fullIdentifier="" @title="ITU-T L.163 (11/2018)">,
34
+ <ItuBib::Hit:0x007f92b5d69350 @text="ITU-T L.163" @fetched="false" @fullIdentifier="" @title="ITU-T Z.161 (10/2018)">]
35
+ ...
36
+
37
+ item = hit_collection[1].fetch
38
+ => #<IsoBibItem::IsoBibliographicItem:0x007f92b5c723c0
39
+ ...
40
+ ----
41
+
42
+ === XML serialization
43
+ [source,ruby]
44
+ ----
45
+ item.to_xml
46
+ => "<bibitem type=\"international-standard\" id=\"ITU-TZ.161(10/2018)\">
47
+ <fetched>2019-03-01</fetched>
48
+ <title format=\"text/plain\" language=\"en\" script=\"Latn\">
49
+ ITU-T Z.161 (10/2018): Testing and Test Control Notation version 3: TTCN-3 core language
50
+ </title>
51
+ ...
52
+ </bibitem>"
53
+ ----
54
+
55
+ === Get code, and year
56
+ [source,ruby]
57
+ ----
58
+ ItuBib::ItuBibliography.get("ITU-T L.163", "2018", {})
59
+ fetching ITU-T L.163...
60
+ => #<IsoBibItem::IsoBibliographicItem:0x007f92b5d8bc20
61
+ ...
62
+ ----
63
+
64
+ == Development
65
+
66
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
67
+
68
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
69
+
70
+ == Contributing
71
+
72
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/itubib.
73
+
74
+ == License
75
+
76
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "itubib"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/itubib.gemspec ADDED
@@ -0,0 +1,38 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "itubib/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "ituBib"
8
+ spec.version = ItuBib::VERSION
9
+ spec.authors = ["Ribose Inc."]
10
+ spec.email = ["open.source@ribose.com"]
11
+
12
+ spec.summary = "ItuBib: retrieve ITU Standards for bibliographic use "\
13
+ "using the BibliographicItem model"
14
+ spec.description = "ItuBib: retrieve ITU Standards for bibliographic use "\
15
+ "using the BibliographicItem model"
16
+ spec.homepage = "https://github.com/riboseinc/itubib"
17
+ spec.license = "MIT"
18
+
19
+ # Specify which files should be added to the gem when it is released.
20
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
21
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
22
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
23
+ end
24
+ spec.bindir = "exe"
25
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
26
+ spec.require_paths = ["lib"]
27
+
28
+ spec.add_development_dependency "bundler", "~> 2.0"
29
+ spec.add_development_dependency 'equivalent-xml', '~> 0.6'
30
+ spec.add_development_dependency 'pry-byebug'
31
+ spec.add_development_dependency "rake", "~> 10.0"
32
+ spec.add_development_dependency "rspec", "~> 3.0"
33
+ spec.add_development_dependency 'simplecov'
34
+ spec.add_development_dependency 'vcr'
35
+ spec.add_development_dependency 'webmock'
36
+
37
+ spec.add_dependency 'iso-bib-item', '~> 0.4.2'
38
+ end
data/lib/itubib.rb ADDED
@@ -0,0 +1,12 @@
1
+ require "itubib/version"
2
+ require 'itubib/itu_bibliography'
3
+
4
+ if defined? Relaton
5
+ require_relative 'relaton/processor'
6
+ Relaton::Registry.instance.register(Relaton::ItuBib::Processor)
7
+ end
8
+
9
+ module ItuBib
10
+ class Error < StandardError; end
11
+ # Your code goes here...
12
+ end
data/lib/itubib/hit.rb ADDED
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ItuBib
4
+ # Hit.
5
+ class Hit
6
+ # @return [Isobib::HitCollection]
7
+ attr_reader :hit_collection
8
+
9
+ # @return [Array<Hash>]
10
+ attr_reader :hit
11
+
12
+ # @param hit [Hash]
13
+ # @param hit_collection [Isobib:HitCollection]
14
+ def initialize(hit, hit_collection = nil)
15
+ @hit = hit
16
+ @hit_collection = hit_collection
17
+ end
18
+
19
+ # Parse page.
20
+ # @return [Isobib::IsoBibliographicItem]
21
+ def fetch
22
+ @fetch ||= Scrapper.parse_page @hit
23
+ end
24
+
25
+ # @return [String]
26
+ def to_s
27
+ inspect
28
+ end
29
+
30
+ # @return [String]
31
+ def inspect
32
+ "<#{self.class}:#{format('%#.14x', object_id << 1)} "\
33
+ "@text=\"#{@hit_collection&.text}\" "\
34
+ "@fetched=\"#{!@fetch.nil?}\" "\
35
+ "@fullIdentifier=\"#{@fetch&.shortref(nil)}\" "\
36
+ "@title=\"#{@hit[:code]}\">"
37
+ end
38
+
39
+ # @return [String]
40
+ def to_xml(opts = {})
41
+ #if builder
42
+ #fetch.to_xml builder, opts
43
+ #else
44
+ builder = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
45
+ fetch.to_xml xml, opts
46
+ end
47
+ builder.doc.root.to_xml
48
+ #end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'itubib/hit'
4
+ require "addressable/uri"
5
+ require 'net/http'
6
+
7
+ module ItuBib
8
+ # Page of hit collection.
9
+ class HitCollection < Array
10
+
11
+ DOMAIN = 'https://www.itu.int'
12
+
13
+ # @return [TrueClass, FalseClass]
14
+ attr_reader :fetched
15
+
16
+ # @return [String]
17
+ attr_reader :text
18
+
19
+ # @return [String]
20
+ attr_reader :year
21
+
22
+ # @param ref_nbr [String]
23
+ # @param year [String]
24
+ def initialize(ref_nbr, year = nil) #(text, hit_pages = nil)
25
+ @text = ref_nbr
26
+ @year = year
27
+ from, to = nil
28
+ if year
29
+ from = Date.strptime year, '%Y'
30
+ to = from.next_year.prev_day
31
+ end
32
+ url = "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/Search"
33
+ params = {
34
+ "Input"=>"163",
35
+ "Start"=>0,
36
+ "Rows"=>10,
37
+ "SortBy"=>"RELEVANCE",
38
+ "ExactPhrase"=>false,
39
+ "CollectionName"=>"General",
40
+ "CollectionGroup"=>"Recommendations",
41
+ "Sector"=>"t",
42
+ "Criterias"=> [{
43
+ "Name"=>"Search in",
44
+ "Criterias" => [
45
+ {"Selected"=>false, "Value"=>"", "Label"=>"Name", "Target"=>"/name_s", "TypeName"=>"CHECKBOX", "GetCriteriaType"=>0},
46
+ {"Selected"=>false, "Value"=>"", "Label"=>"Short description", "Target"=>"/short_description_s", "TypeName"=>"CHECKBOX", "GetCriteriaType"=>0},
47
+ {"Selected"=>false, "Value"=>"", "Label"=>"File content", "Target"=>"/file", "TypeName"=>"CHECKBOX", "GetCriteriaType"=>0}
48
+ ],
49
+ "ShowCheckbox"=>true,
50
+ "Selected"=>false
51
+ }],
52
+ "Topics"=>"",
53
+ "ClientData"=>{"ip"=>""},
54
+ "Language"=>"en",
55
+ "IP"=>"",
56
+ "SearchType"=>"All"
57
+ }
58
+ data = { json: params.to_json }
59
+ resp = Net::HTTP.post(URI(url), data.to_json, 'Content-Type' => 'application/json')
60
+ doc = JSON.parse resp.body
61
+ hits = doc['results'].map do |h|
62
+ code = h['Media']['Name']
63
+ title = h['Title']
64
+ url = h['Redirection']
65
+ Hit.new({ code: code, title: title, url: url }, self)
66
+ end
67
+ concat hits
68
+ @fetched = false
69
+ end
70
+
71
+ # @return [ItuBib::HitCollection]
72
+ def fetch
73
+ workers = WorkersPool.new 4
74
+ workers.worker(&:fetch)
75
+ each do |hit|
76
+ workers << hit
77
+ end
78
+ workers.end
79
+ workers.result
80
+ @fetched = true
81
+ self
82
+ end
83
+
84
+ def to_s
85
+ inspect
86
+ end
87
+
88
+ def inspect
89
+ "<#{self.class}:#{format('%#.14x', object_id << 1)} @fetched=#{@fetched}>"
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ # require 'isobib/iso_bibliographic_item'
4
+ require 'itubib/scrapper'
5
+ require 'itubib/hit_collection'
6
+ require "date"
7
+
8
+ module ItuBib
9
+ # Class methods for search ISO standards.
10
+ class ItuBibliography
11
+ class << self
12
+ # @param text [String]
13
+ # @return [ItuBib::HitCollection]
14
+ def search(text, year = nil)
15
+ begin
16
+ HitCollection.new text, year
17
+ rescue
18
+ warn "Could not access http://www.itu.int"
19
+ []
20
+ end
21
+ end
22
+
23
+ # @param code [String] the ISO standard Code to look up (e..g "ISO 9000")
24
+ # @param year [String] the year the standard was published (optional)
25
+ # @param opts [Hash] options; restricted to :all_parts if all-parts reference is required
26
+ # @return [String] Relaton XML serialisation of reference
27
+ def get(code, year, opts)
28
+ code += '-1' if opts[:all_parts]
29
+ ret = itubib_get1(code, year, opts)
30
+ return nil if ret.nil?
31
+ ret.to_most_recent_reference unless year || opts[:keep_year]
32
+ ret.to_all_parts if opts[:all_parts]
33
+ ret
34
+ end
35
+
36
+ private
37
+
38
+ def fetch_ref_err(code, year, missed_years)
39
+ id = year ? "#{code}:#{year}" : code
40
+ warn "WARNING: no match found online for #{id}. "\
41
+ "The code must be exactly like it is on the standards website."
42
+ warn "(There was no match for #{year}, though there were matches "\
43
+ "found for #{missed_years.join(', ')}.)" unless missed_years.empty?
44
+ if /\d-\d/ =~ code
45
+ warn "The provided document part may not exist, or the document "\
46
+ "may no longer be published in parts."
47
+ else
48
+ warn "If you wanted to cite all document parts for the reference, "\
49
+ "use \"#{code} (all parts)\".\nIf the document is not a standard, "\
50
+ "use its document type abbreviation (TS, TR, PAS, Guide)."
51
+ end
52
+ nil
53
+ end
54
+
55
+ def fetch_pages(s, n)
56
+ workers = WorkersPool.new n
57
+ workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
58
+ s.each_with_index { |hit, i| workers << { i: i, hit: hit } }
59
+ workers.end
60
+ workers.result.sort { |x, y| x[:i] <=> y[:i] }.map { |x| x[:hit] }
61
+ end
62
+
63
+ def search_filter(code)
64
+ docidrx = %r{^ITU-T\s[^\s]+}
65
+ warn "fetching #{code}..."
66
+ result = search(code)
67
+ result.select do |i|
68
+ i.hit[:code] &&
69
+ i.hit[:code].match(docidrx).to_s == code
70
+ end
71
+ end
72
+
73
+ # Sort through the results from Isobib, fetching them three at a time,
74
+ # and return the first result that matches the code,
75
+ # matches the year (if provided), and which # has a title (amendments do not).
76
+ # Only expects the first page of results to be populated.
77
+ # Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
78
+ # If no match, returns any years which caused mismatch, for error reporting
79
+ def isobib_results_filter(result, year)
80
+ missed_years = []
81
+ result.each_slice(3) do |s| # ISO website only allows 3 connections
82
+ fetch_pages(s, 3).each_with_index do |r, i|
83
+ return { ret: r } if !year
84
+ r.dates.select { |d| d.type == "published" }.each do |d|
85
+ return { ret: r } if year.to_i == d.on.year
86
+ missed_years << d.on.year
87
+ end
88
+ end
89
+ end
90
+ { years: missed_years }
91
+ end
92
+
93
+ def itubib_get1(code, year, opts)
94
+ result = search_filter(code) or return nil
95
+ ret = isobib_results_filter(result, year)
96
+ return ret[:ret] if ret[:ret]
97
+ fetch_ref_err(code, year, ret[:years])
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,292 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'iso_bib_item'
4
+ require 'itubib/hit'
5
+ require 'nokogiri'
6
+ require 'net/http'
7
+ require 'itubib/workers_pool'
8
+
9
+ # Capybara.request_driver :poltergeist do |app|
10
+ # Capybara::Poltergeist::Driver.new app, js_errors: false
11
+ # end
12
+ # Capybara.default_driver = :poltergeist
13
+
14
+ module ItuBib
15
+ # Scrapper.
16
+ # rubocop:disable Metrics/ModuleLength
17
+ module Scrapper
18
+ DOMAIN = 'https://www.itu.int'
19
+
20
+ TYPES = {
21
+ 'ISO' => 'international-standard',
22
+ 'TS' => 'technicalSpecification',
23
+ 'TR' => 'technicalReport',
24
+ 'PAS' => 'publiclyAvailableSpecification',
25
+ 'AWI' => 'appruvedWorkItem',
26
+ 'CD' => 'committeeDraft',
27
+ 'FDIS' => 'finalDraftInternationalStandard',
28
+ 'NP' => 'newProposal',
29
+ 'DIS' => 'draftInternationalStandard',
30
+ 'WD' => 'workingDraft',
31
+ 'R' => 'recommendation',
32
+ 'Guide' => 'guide'
33
+ }.freeze
34
+
35
+ class << self
36
+ # @param text [String]
37
+ # @return [Array<Hash>]
38
+ # def get(text)
39
+ # iso_workers = WorkersPool.new 4
40
+ # iso_workers.worker { |hit| iso_worker(hit, iso_workers) }
41
+ # algolia_workers = start_algolia_search(text, iso_workers)
42
+ # iso_docs = iso_workers.result
43
+ # algolia_workers.end
44
+ # algolia_workers.result
45
+ # iso_docs
46
+ # end
47
+
48
+ # Parse page.
49
+ # @param hit [Hash]
50
+ # @return [Hash]
51
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
52
+ def parse_page(hit_data)
53
+ doc = get_page hit_data[:url]
54
+
55
+ # Fetch edition.
56
+ edition = doc.at("//table/tr/td/span[contains(@id, 'Label8')]/b").text
57
+
58
+ IsoBibItem::IsoBibliographicItem.new(
59
+ docid: fetch_docid(hit_data[:code]),
60
+ edition: edition,
61
+ language: ['en'],
62
+ script: ['Latn'],
63
+ titles: fetch_titles(hit_data),
64
+ type: fetch_type(doc),
65
+ docstatus: fetch_status(doc),
66
+ ics: [], # fetch_ics(doc),
67
+ dates: fetch_dates(doc),
68
+ contributors: fetch_contributors(hit_data[:code]),
69
+ workgroup: fetch_workgroup(doc),
70
+ abstract: fetch_abstract(doc),
71
+ copyright: fetch_copyright(hit_data[:code], doc),
72
+ link: fetch_link(doc, hit_data[:url]),
73
+ relations: fetch_relations(doc)
74
+ )
75
+ end
76
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
77
+
78
+ private
79
+
80
+ # Fetch abstracts.
81
+ # @param doc [Nokigiri::HTML::Document]
82
+ # @return [Array<Array>]
83
+ def fetch_abstract(doc)
84
+ abstract_url = doc.at('//table/tr/td/span[contains(@id, "lbl_dms")]/div')
85
+ return [] unless abstract_url
86
+
87
+ url = abstract_url[:onclick].match(/https?[^']+/).to_s
88
+ d = Nokogiri::HTML Net::HTTP.get(URI(url))
89
+ abstract_content = d.css('p.MsoNormal').text.gsub(/\r\n/, '')
90
+ .gsub(/\s{2,}/, ' ').gsub(/\u00a0/, '')
91
+
92
+ [{
93
+ content: abstract_content,
94
+ language: 'en',
95
+ script: 'Latn'
96
+ }]
97
+ end
98
+
99
+ # Get langs.
100
+ # @param doc [Nokogiri::HTML::Document]
101
+ # @return [Array<Hash>]
102
+ # def langs(doc)
103
+ # lgs = [{ lang: 'en' }]
104
+ # doc.css('ul#lang-switcher ul li a').each do |lang_link|
105
+ # lang_path = lang_link.attr('href')
106
+ # lang = lang_path.match(%r{^\/(fr)\/})
107
+ # lgs << { lang: lang[1], path: lang_path } if lang
108
+ # end
109
+ # lgs
110
+ # end
111
+
112
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
113
+ # Get page.
114
+ # @param path [String] page's path
115
+ # @return [Array<Nokogiri::HTML::Document, String>]
116
+ def get_page(url)
117
+ uri = URI url
118
+ resp = Net::HTTP.get_response(uri)#.encode("UTF-8")
119
+ while resp.code == '301' || resp.code == '302' || resp.code == '303'
120
+ uri = URI resp['location']
121
+ resp = Net::HTTP.get_response(uri)#.encode("UTF-8")
122
+ end
123
+ Nokogiri::HTML(resp.body)
124
+ end
125
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
126
+
127
+ # Fetch docid.
128
+ # @param doc [Nokogiri::HTML::Document]
129
+ # @return [Hash]
130
+ def fetch_docid(code)
131
+ m = code.match(/(?<=\s)(?<project>[^\s]+)-?(?<part>(?<=-)\d+|)-?(?<subpart>(?<=-)\d+|)/)
132
+ {
133
+ project_number: m[:project],
134
+ part_number: m[:part],
135
+ subpart_number: m[:subpart],
136
+ prefix: nil,
137
+ type: 'ITU',
138
+ id: code
139
+ }
140
+ end
141
+
142
+ # Fetch status.
143
+ # @param doc [Nokogiri::HTML::Document]
144
+ # @param status [String]
145
+ # @return [Hash]
146
+ def fetch_status(doc)
147
+ s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]").text
148
+ if s == 'In force'
149
+ status = 'Published'
150
+ stage = '60'
151
+ substage = '60'
152
+ else
153
+ status = 'Withdrawal'
154
+ stage = '95'
155
+ substage = '99'
156
+ end
157
+ { status: status, stage: stage, substage: substage }
158
+ end
159
+
160
+ # Fetch workgroup.
161
+ # @param doc [Nokogiri::HTML::Document]
162
+ # @return [Hash]
163
+ def fetch_workgroup(doc)
164
+ wg = doc.at('//table/tr/td/span[contains(@id, "Label8")]/a').text
165
+ { name: 'International Telecommunication Union',
166
+ abbreviation: 'ITU',
167
+ url: 'www.itu.int',
168
+ technical_committee: {
169
+ name: wg,
170
+ type: 'technicalCommittee',
171
+ number: wg.match(/\d+/)&.to_s&.to_i
172
+ } }
173
+ end
174
+
175
+ # Fetch relations.
176
+ # @param doc [Nokogiri::HTML::Document]
177
+ # @return [Array<Hash>]
178
+ # rubocop:disable Metrics/MethodLength
179
+ def fetch_relations(doc)
180
+ doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]').map do |r|
181
+ r_type = r.at('./td/span[contains(@id, "Label4")]/nobr').text.downcase
182
+ type = case r_type
183
+ when 'in force' then 'published'
184
+ else r_type
185
+ end
186
+ ref = r.at('./td/span[contains(@id, "title_e")]/nobr/a')
187
+ url = DOMAIN + ref[:href].sub(/^\./, '/ITU-T/recommendations')
188
+ { type: type, identifier: ref.text, url: url }
189
+ end
190
+ end
191
+
192
+ # Fetch type.
193
+ # @param doc [Nokogiri::HTML::Document]
194
+ # @return [String]
195
+ def fetch_type(doc)
196
+ 'international-standard'
197
+ end
198
+
199
+ # Fetch titles.
200
+ # @param hit_data [Hash]
201
+ # @return [Array<Hash>]
202
+ def fetch_titles(hit_data)
203
+ titles = hit_data[:title].split ' - '
204
+ case titles.size
205
+ when 0
206
+ intro, main, part = nil, "", nil
207
+ when 1
208
+ intro, main, part = nil, titles[0], nil
209
+ when 2
210
+ if /^(Part|Partie) \d+:/ =~ titles[1]
211
+ intro, main, part = nil, titles[0], titles[1]
212
+ else
213
+ intro, main, part = titles[0], titles[1], nil
214
+ end
215
+ when 3
216
+ intro, main, part = titles[0], titles[1], titles[2]
217
+ else
218
+ intro, main, part = titles[0], titles[1], titles[2..-1]&.join(" -- ")
219
+ end
220
+ [{
221
+ title_intro: intro,
222
+ title_main: main,
223
+ title_part: part,
224
+ language: 'en',
225
+ script: 'Latn'
226
+ }]
227
+ end
228
+
229
+ # Fetch dates
230
+ # @param doc [Nokogiri::HTML::Document]
231
+ # @return [Array<Hash>]
232
+ def fetch_dates(doc)
233
+ dates = []
234
+ publish_date = doc.at("//table/tr/td/span[contains(@id, 'Label5')]").text
235
+ unless publish_date.empty?
236
+ dates << { type: 'published', on: publish_date }
237
+ end
238
+ dates
239
+ end
240
+
241
+ # Fetch contributors
242
+ # @param doc [Nokogiri::HTML::Document]
243
+ # @return [Array<Hash>]
244
+ def fetch_contributors(code)
245
+ abbrev = code.sub(/-\w\s.*/, '')
246
+ case abbrev
247
+ when 'ITU'
248
+ name = 'International Telecommunication Union'
249
+ url = 'www.itu.int'
250
+ end
251
+ [{ entity: { name: name, url: url, abbreviation: abbrev }, roles: ['publisher'] }]
252
+ end
253
+
254
+ # Fetch ICS.
255
+ # @param doc [Nokogiri::HTML::Document]
256
+ # @return [Array<Hash>]
257
+ # def fetch_ics(doc)
258
+ # doc.xpath('//th[contains(text(), "ICS")]/following-sibling::td/a').map do |i|
259
+ # code = i.text.match(/[\d\.]+/).to_s.split '.'
260
+ # { field: code[0], group: code[1], subgroup: code[2] }
261
+ # end
262
+ # end
263
+
264
+ # Fetch links.
265
+ # @param doc [Nokogiri::HTML::Document]
266
+ # @param url [String]
267
+ # @return [Array<Hash>]
268
+ def fetch_link(doc, url)
269
+ links = [{ type: 'src', content: url }]
270
+ obp_elms = doc.at('//table/tr/td/span[contains(@id, "Label4")]/a')
271
+ links << { type: 'obp', content: DOMAIN + obp_elms[:href] } if obp_elms
272
+ links
273
+ end
274
+
275
+ # Fetch copyright.
276
+ # @param code [String]
277
+ # @param doc [Nokogiri::HTML::Document]
278
+ # @return [Hash]
279
+ def fetch_copyright(code, doc)
280
+ abbreviation = code.match(/^[^-]+/).to_s
281
+ case abbreviation
282
+ when 'ITU'
283
+ name = 'International Telecommunication Union'
284
+ url = 'www.itu.int'
285
+ end
286
+ from = doc.at("//table/tr/td/span[contains(@id, 'Label5')]").text
287
+ { owner: { name: name, abbreviation: abbreviation, url: url }, from: from }
288
+ end
289
+ end
290
+ end
291
+ # rubocop:enable Metrics/ModuleLength
292
+ end
@@ -0,0 +1,3 @@
1
+ module ItuBib
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Workers poll.
4
+ class WorkersPool
5
+ attr_accessor :nb_hits
6
+
7
+ def initialize(num_workers = 2)
8
+ @num_workers = num_workers < 2 ? 2 : num_workers
9
+ @queue = SizedQueue.new(num_workers * 2)
10
+ @result = []
11
+ @nb_hits = 0
12
+ end
13
+
14
+ def worker(&block)
15
+ @threads = Array.new @num_workers do
16
+ Thread.new do
17
+ until (item = @queue.pop) == :END
18
+ @result << yield(item) if block
19
+ end
20
+ end
21
+ end
22
+ end
23
+
24
+ def result
25
+ @threads.each(&:join)
26
+ @result
27
+ end
28
+
29
+ def <<(item)
30
+ @queue << item
31
+ self
32
+ end
33
+
34
+ def end
35
+ @num_workers.times { @queue << :END }
36
+ end
37
+
38
+ def size
39
+ @result.size
40
+ end
41
+ end
@@ -0,0 +1,23 @@
1
+ require "relaton/processor"
2
+
3
+ module Relaton
4
+ module ItuBib
5
+ class Processor < Relaton::Processor
6
+
7
+ def initialize
8
+ @short = :itubib
9
+ @prefix = "ITU"
10
+ @defaultprefix = %r{^(ITU)}
11
+ @idtype = "ITU"
12
+ end
13
+
14
+ def get(code, date, opts)
15
+ ::ItuBib::ItuBliography.get(code, date, opts)
16
+ end
17
+
18
+ def from_xml(xml)
19
+ IsoBibItem::XMLParser.from_xml xml
20
+ end
21
+ end
22
+ end
23
+ end
metadata ADDED
@@ -0,0 +1,191 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ituBib
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Ribose Inc.
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2019-03-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: equivalent-xml
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.6'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.6'
41
+ - !ruby/object:Gem::Dependency
42
+ name: pry-byebug
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '10.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '3.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '3.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: simplecov
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: vcr
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: webmock
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: iso-bib-item
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: 0.4.2
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: 0.4.2
139
+ description: 'ItuBib: retrieve ITU Standards for bibliographic use using the BibliographicItem
140
+ model'
141
+ email:
142
+ - open.source@ribose.com
143
+ executables: []
144
+ extensions: []
145
+ extra_rdoc_files: []
146
+ files:
147
+ - ".gitignore"
148
+ - ".rspec"
149
+ - ".travis.yml"
150
+ - Gemfile
151
+ - Gemfile.lock
152
+ - LICENSE.txt
153
+ - README.adoc
154
+ - Rakefile
155
+ - bin/console
156
+ - bin/setup
157
+ - itubib.gemspec
158
+ - lib/itubib.rb
159
+ - lib/itubib/hit.rb
160
+ - lib/itubib/hit_collection.rb
161
+ - lib/itubib/itu_bibliography.rb
162
+ - lib/itubib/scrapper.rb
163
+ - lib/itubib/version.rb
164
+ - lib/itubib/workers_pool.rb
165
+ - lib/relaton/processor.rb
166
+ homepage: https://github.com/riboseinc/itubib
167
+ licenses:
168
+ - MIT
169
+ metadata: {}
170
+ post_install_message:
171
+ rdoc_options: []
172
+ require_paths:
173
+ - lib
174
+ required_ruby_version: !ruby/object:Gem::Requirement
175
+ requirements:
176
+ - - ">="
177
+ - !ruby/object:Gem::Version
178
+ version: '0'
179
+ required_rubygems_version: !ruby/object:Gem::Requirement
180
+ requirements:
181
+ - - ">="
182
+ - !ruby/object:Gem::Version
183
+ version: '0'
184
+ requirements: []
185
+ rubyforge_project:
186
+ rubygems_version: 2.6.12
187
+ signing_key:
188
+ specification_version: 4
189
+ summary: 'ItuBib: retrieve ITU Standards for bibliographic use using the BibliographicItem
190
+ model'
191
+ test_files: []