relaton-itu 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a69976e4fa5be5f658de58234c3b2470702342f5
4
+ data.tar.gz: b8784a5c2fe9a57e3eea2ed104de573365b5be0f
5
+ SHA512:
6
+ metadata.gz: 2fa691cc886fbcae97369fa4d61e016155194b820642bdc26554caa7639b1fa04a6c641f2b7a6ea7048afb36761cf6b12d32304a7a609a727f8095e1d1d4ac22
7
+ data.tar.gz: 2dae694690fd59635e27b9e7da5bd507dcbc49bdf06842cbdc24d893a2b1182e4432d92321ee368a4c3a111e78ef27a0a524ef1a39dbed2f9e98a3ea18a298dc
@@ -0,0 +1,13 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ .vscode/
10
+ .rubocop-https---raw-githubusercontent-com-riboseinc-oss-guides-master-ci-rubocop-yml
11
+
12
+ # rspec failure tracking
13
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,10 @@
1
+ # This project follows the Ribose OSS style guide.
2
+ # https://github.com/riboseinc/oss-guides
3
+ # All project-specific additions and overrides should be specified in this file.
4
+
5
+ inherit_from:
6
+ - https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
7
+ AllCops:
8
+ TargetRubyVersion: 2.3
9
+ Rails:
10
+ Enabled: true
@@ -0,0 +1,15 @@
1
+ language: ruby
2
+ cache: bundler
3
+ os:
4
+ - linux
5
+ - osx
6
+ rvm:
7
+ - 2.5
8
+ - 2.4
9
+ - ruby-head
10
+ before_install:
11
+ - gem install bundler -v 2.0.1
12
+ - bundle update
13
+ matrix:
14
+ allow_failures:
15
+ - rvm: ruby-head
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ gem 'vcr', git: 'https://github.com/vcr/vcr.git', ref: 'a0bb140184e8518baad640a705e04155911bf31f'
4
+
5
+ # Specify your gem's dependencies in relaton_itu.gemspec
6
+ gemspec
@@ -0,0 +1,93 @@
1
+ GIT
2
+ remote: https://github.com/vcr/vcr.git
3
+ revision: a0bb140184e8518baad640a705e04155911bf31f
4
+ ref: a0bb140184e8518baad640a705e04155911bf31f
5
+ specs:
6
+ vcr (4.0.0)
7
+
8
+ PATH
9
+ remote: .
10
+ specs:
11
+ relaton-itu (0.1.0)
12
+ iso-bib-item (~> 0.4.2)
13
+
14
+ GEM
15
+ remote: https://rubygems.org/
16
+ specs:
17
+ addressable (2.6.0)
18
+ public_suffix (>= 2.0.2, < 4.0)
19
+ byebug (11.0.1)
20
+ coderay (1.1.2)
21
+ crack (0.4.3)
22
+ safe_yaml (~> 1.0.0)
23
+ debase (0.2.2)
24
+ debase-ruby_core_source (>= 0.10.2)
25
+ debase-ruby_core_source (0.10.4)
26
+ diff-lcs (1.3)
27
+ docile (1.3.1)
28
+ equivalent-xml (0.6.0)
29
+ nokogiri (>= 1.4.3)
30
+ hashdiff (0.3.8)
31
+ iso-bib-item (0.4.5)
32
+ isoics (~> 0.1.6)
33
+ nokogiri (~> 1.8.4)
34
+ ruby_deep_clone (~> 0.8.0)
35
+ isoics (0.1.7)
36
+ json (2.2.0)
37
+ method_source (0.9.2)
38
+ mini_portile2 (2.3.0)
39
+ nokogiri (1.8.5)
40
+ mini_portile2 (~> 2.3.0)
41
+ pry (0.12.2)
42
+ coderay (~> 1.1.0)
43
+ method_source (~> 0.9.0)
44
+ pry-byebug (3.7.0)
45
+ byebug (~> 11.0)
46
+ pry (~> 0.10)
47
+ public_suffix (3.0.3)
48
+ rake (10.5.0)
49
+ rspec (3.8.0)
50
+ rspec-core (~> 3.8.0)
51
+ rspec-expectations (~> 3.8.0)
52
+ rspec-mocks (~> 3.8.0)
53
+ rspec-core (3.8.0)
54
+ rspec-support (~> 3.8.0)
55
+ rspec-expectations (3.8.2)
56
+ diff-lcs (>= 1.2.0, < 2.0)
57
+ rspec-support (~> 3.8.0)
58
+ rspec-mocks (3.8.0)
59
+ diff-lcs (>= 1.2.0, < 2.0)
60
+ rspec-support (~> 3.8.0)
61
+ rspec-support (3.8.0)
62
+ ruby-debug-ide (0.6.1)
63
+ rake (>= 0.8.1)
64
+ ruby_deep_clone (0.8.0)
65
+ safe_yaml (1.0.5)
66
+ simplecov (0.16.1)
67
+ docile (~> 1.1)
68
+ json (>= 1.8, < 3)
69
+ simplecov-html (~> 0.10.0)
70
+ simplecov-html (0.10.2)
71
+ webmock (3.5.1)
72
+ addressable (>= 2.3.6)
73
+ crack (>= 0.3.2)
74
+ hashdiff
75
+
76
+ PLATFORMS
77
+ ruby
78
+
79
+ DEPENDENCIES
80
+ bundler (~> 2.0)
81
+ debase
82
+ equivalent-xml (~> 0.6)
83
+ pry-byebug
84
+ rake (~> 10.0)
85
+ relaton-itu!
86
+ rspec (~> 3.0)
87
+ ruby-debug-ide
88
+ simplecov
89
+ vcr!
90
+ webmock
91
+
92
+ BUNDLED WITH
93
+ 2.0.1
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2019 Andrei Kislichenko
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,78 @@
1
+ = RelatonItu: retrieve ITU Standards for bibliographic use using the BibliographicItem model
2
+
3
+ image:https://img.shields.io/gem/v/relaton_bib.svg["Gem Version", link="https://rubygems.org/gems/relaton_bib"]
4
+ image:https://travis-ci.com/metanorma/relaton_bib.svg?branch=master["Build Status", link="https://travis-ci.com/metanorma/relaton_bib"]
5
+ image:https://ci.appveyor.com/api/projects/status/fd39m2762jo8ve04?svg=true["Appveyor Build Status", link="https://ci.appveyor.com/project/ribose/relaton_bib"]
6
+
7
+ RelatonItu is a Ruby gem that implements the https://github.com/metanorma/metanorma-model-iso#iso-bibliographic-item[IsoBibliographicItem model].
8
+
9
+ == Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ [source,ruby]
14
+ ----
15
+ gem 'relaton_bib'
16
+ ----
17
+
18
+ And then execute:
19
+
20
+ $ bundle
21
+
22
+ Or install it yourself as:
23
+
24
+ $ gem install relaton_bib
25
+
26
+ == Usage
27
+
28
+ === Search for a standard using keywords
29
+
30
+ [source,ruby]
31
+ ----
32
+ require 'relaton_bib'
33
+
34
+ hit_collection = RelatonItu::ItuBibliography.search("ITU-T L.163")
35
+ => [<RelatonItu::Hit:0x007f92b5d693c8 @text="ITU-T L.163" @fetched="false" @fullIdentifier="" @title="ITU-T L.163 (11/2018)">,
36
+ <RelatonItu::Hit:0x007f92b5d69350 @text="ITU-T L.163" @fetched="false" @fullIdentifier="" @title="ITU-T Z.161 (10/2018)">]
37
+ ...
38
+
39
+ item = hit_collection[1].fetch
40
+ => #<IsoBibItem::IsoBibliographicItem:0x007f92b5c723c0
41
+ ...
42
+ ----
43
+
44
+ === XML serialization
45
+ [source,ruby]
46
+ ----
47
+ item.to_xml
48
+ => "<bibitem type=\"international-standard\" id=\"ITU-TZ.161(10/2018)\">
49
+ <fetched>2019-03-01</fetched>
50
+ <title format=\"text/plain\" language=\"en\" script=\"Latn\">
51
+ ITU-T Z.161 (10/2018): Testing and Test Control Notation version 3: TTCN-3 core language
52
+ </title>
53
+ ...
54
+ </bibitem>"
55
+ ----
56
+
57
+ === Get code, and year
58
+ [source,ruby]
59
+ ----
60
+ RelatonItu::ItuBibliography.get("ITU-T L.163", "2018", {})
61
+ fetching ITU-T L.163...
62
+ => #<IsoBibItem::IsoBibliographicItem:0x007f92b5d8bc20
63
+ ...
64
+ ----
65
+
66
+ == Development
67
+
68
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
69
+
70
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
71
+
72
+ == Contributing
73
+
74
+ Bug reports and pull requests are welcome on GitHub at https://github.com/metanorma/relaton_bib.
75
+
76
+ == License
77
+
78
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,28 @@
1
+ version: '{build}'
2
+
3
+ environment:
4
+ matrix:
5
+ - RUBY_VERSION: 25
6
+ - RUBY_VERSION: 24
7
+ - RUBY_VERSION: _trunk
8
+
9
+ matrix:
10
+ allow_failures:
11
+ - RUBY_VERSION: _trunk
12
+
13
+ install:
14
+ - ps: . { iwr -useb https://raw.githubusercontent.com/metanorma/metanorma-build-scripts/master/appveyor.ps1 } | iex
15
+ - refreshenv
16
+
17
+ build_script:
18
+ - set PATH=C:\Ruby%RUBY_VERSION%\bin;%PATH%
19
+ - bundle update
20
+ - bundle install
21
+
22
+ before_test:
23
+ - ruby -v
24
+ - gem -v
25
+ - bundle -v
26
+
27
+ test_script:
28
+ - bundle exec rake
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "relaton_itu"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,40 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "relaton_itu/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "relaton-itu"
8
+ spec.version = RelatonItu::VERSION
9
+ spec.authors = ["Ribose Inc."]
10
+ spec.email = ["open.source@ribose.com"]
11
+
12
+ spec.summary = "RelatonItu: retrieve ITU Standards for bibliographic use "\
13
+ "using the BibliographicItem model"
14
+ spec.description = "RelatonItu: retrieve ITU Standards for bibliographic use "\
15
+ "using the BibliographicItem model"
16
+ spec.homepage = "https://github.com/metanorma/relaton-itu"
17
+ spec.license = "MIT"
18
+
19
+ # Specify which files should be added to the gem when it is released.
20
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
21
+ spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do
22
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
23
+ end
24
+ spec.bindir = "exe"
25
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
26
+ spec.require_paths = ["lib"]
27
+
28
+ spec.add_development_dependency "bundler", "~> 2.0"
29
+ spec.add_development_dependency "equivalent-xml", "~> 0.6"
30
+ spec.add_development_dependency "pry-byebug"
31
+ spec.add_development_dependency "rake", "~> 10.0"
32
+ spec.add_development_dependency "rspec", "~> 3.0"
33
+ spec.add_development_dependency "simplecov"
34
+ spec.add_development_dependency "vcr"
35
+ spec.add_development_dependency "webmock"
36
+ spec.add_development_dependency "ruby-debug-ide"
37
+ spec.add_development_dependency "debase"
38
+
39
+ spec.add_dependency "iso-bib-item", "~> 0.4.2"
40
+ end
@@ -0,0 +1,23 @@
1
+ require "relaton/processor"
2
+
3
+ module Relaton
4
+ module RelatonItu
5
+ class Processor < Relaton::Processor
6
+
7
+ def initialize
8
+ @short = :relaton_ite
9
+ @prefix = "ITU"
10
+ @defaultprefix = %r{^(ITU)}
11
+ @idtype = "ITU"
12
+ end
13
+
14
+ def get(code, date, opts)
15
+ ::RelatonItu::ItuBliography.get(code, date, opts)
16
+ end
17
+
18
+ def from_xml(xml)
19
+ IsoBibItem::XMLParser.from_xml xml
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,12 @@
1
+ require "relaton_itu/version"
2
+ require 'relaton_itu/itu_bibliography'
3
+
4
+ if defined? Relaton
5
+ require_relative 'relaton/processor'
6
+ Relaton::Registry.instance.register(Relaton::RelatonItu::Processor)
7
+ end
8
+
9
+ module RelatonItu
10
+ class Error < StandardError; end
11
+ # Your code goes here...
12
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RelatonItu
4
+ # Hit.
5
+ class Hit
6
+ # @return [Isobib::HitCollection]
7
+ attr_reader :hit_collection
8
+
9
+ # @return [Array<Hash>]
10
+ attr_reader :hit
11
+
12
+ # @param hit [Hash]
13
+ # @param hit_collection [Isobib:HitCollection]
14
+ def initialize(hit, hit_collection = nil)
15
+ @hit = hit
16
+ @hit_collection = hit_collection
17
+ end
18
+
19
+ # Parse page.
20
+ # @return [Isobib::IsoBibliographicItem]
21
+ def fetch
22
+ @fetch ||= Scrapper.parse_page @hit
23
+ end
24
+
25
+ # @return [String]
26
+ def to_s
27
+ inspect
28
+ end
29
+
30
+ # @return [String]
31
+ def inspect
32
+ "<#{self.class}:#{format('%#.14x', object_id << 1)} "\
33
+ "@text=\"#{@hit_collection&.text}\" "\
34
+ "@fetched=\"#{!@fetch.nil?}\" "\
35
+ "@fullIdentifier=\"#{@fetch&.shortref(nil)}\" "\
36
+ "@title=\"#{@hit[:code]}\">"
37
+ end
38
+
39
+ # @return [String]
40
+ def to_xml(opts = {})
41
+ #if builder
42
+ #fetch.to_xml builder, opts
43
+ #else
44
+ builder = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
45
+ fetch.to_xml xml, opts
46
+ end
47
+ builder.doc.root.to_xml
48
+ #end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'relaton_itu/hit'
4
+ require "addressable/uri"
5
+ require 'net/http'
6
+
7
+ module RelatonItu
8
+ # Page of hit collection.
9
+ class HitCollection < Array
10
+
11
+ DOMAIN = 'https://www.itu.int'.freeze
12
+
13
+ # @return [TrueClass, FalseClass]
14
+ attr_reader :fetched
15
+
16
+ # @return [String]
17
+ attr_reader :text
18
+
19
+ # @return [String]
20
+ attr_reader :year
21
+
22
+ # @param ref_nbr [String]
23
+ # @param year [String]
24
+ def initialize(ref_nbr, year = nil) #(text, hit_pages = nil)
25
+ @text = ref_nbr
26
+ @year = year
27
+ from, to = nil
28
+ if year
29
+ from = Date.strptime year, '%Y'
30
+ to = from.next_year.prev_day
31
+ end
32
+ url = "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/Search"
33
+ params = {
34
+ "Input"=>"163",
35
+ "Start"=>0,
36
+ "Rows"=>10,
37
+ "SortBy"=>"RELEVANCE",
38
+ "ExactPhrase"=>false,
39
+ "CollectionName"=>"General",
40
+ "CollectionGroup"=>"Recommendations",
41
+ "Sector"=>"t",
42
+ "Criterias"=> [{
43
+ "Name"=>"Search in",
44
+ "Criterias" => [
45
+ {"Selected"=>false, "Value"=>"", "Label"=>"Name", "Target"=>"/name_s", "TypeName"=>"CHECKBOX", "GetCriteriaType"=>0},
46
+ {"Selected"=>false, "Value"=>"", "Label"=>"Short description", "Target"=>"/short_description_s", "TypeName"=>"CHECKBOX", "GetCriteriaType"=>0},
47
+ {"Selected"=>false, "Value"=>"", "Label"=>"File content", "Target"=>"/file", "TypeName"=>"CHECKBOX", "GetCriteriaType"=>0}
48
+ ],
49
+ "ShowCheckbox"=>true,
50
+ "Selected"=>false
51
+ }],
52
+ "Topics"=>"",
53
+ "ClientData"=>{"ip"=>""},
54
+ "Language"=>"en",
55
+ "IP"=>"",
56
+ "SearchType"=>"All"
57
+ }
58
+ data = { json: params.to_json }
59
+ resp = Net::HTTP.post(URI(url), data.to_json, 'Content-Type' => 'application/json')
60
+ doc = JSON.parse resp.body
61
+ hits = doc['results'].map do |h|
62
+ code = h['Media']['Name']
63
+ title = h['Title']
64
+ url = h['Redirection']
65
+ Hit.new({ code: code, title: title, url: url }, self)
66
+ end
67
+ concat hits
68
+ @fetched = false
69
+ end
70
+
71
+ # @return [RelatonItu::HitCollection]
72
+ def fetch
73
+ workers = WorkersPool.new 4
74
+ workers.worker(&:fetch)
75
+ each do |hit|
76
+ workers << hit
77
+ end
78
+ workers.end
79
+ workers.result
80
+ @fetched = true
81
+ self
82
+ end
83
+
84
+ def to_s
85
+ inspect
86
+ end
87
+
88
+ def inspect
89
+ "<#{self.class}:#{format('%#.14x', object_id << 1)} @fetched=#{@fetched}>"
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ # require 'isobib/iso_bibliographic_item'
4
+ require 'relaton_itu/scrapper'
5
+ require 'relaton_itu/hit_collection'
6
+ require "date"
7
+
8
+ module RelatonItu
9
+ # Class methods for search ISO standards.
10
+ class ItuBibliography
11
+ class << self
12
+ # @param text [String]
13
+ # @return [RelatonItu::HitCollection]
14
+ def search(text, year = nil)
15
+ begin
16
+ HitCollection.new text, year
17
+ rescue
18
+ warn "Could not access http://www.itu.int"
19
+ []
20
+ end
21
+ end
22
+
23
+ # @param code [String] the ISO standard Code to look up (e..g "ISO 9000")
24
+ # @param year [String] the year the standard was published (optional)
25
+ # @param opts [Hash] options; restricted to :all_parts if all-parts reference is required
26
+ # @return [String] Relaton XML serialisation of reference
27
+ def get(code, year = nil, opts = {})
28
+ if year.nil?
29
+ /^(?<code1>[^\s]+\s[^\s]+)\s\(\d{2}\/(?<year1>\d+)\)$/ =~ code
30
+ unless code1.nil?
31
+ code = code1
32
+ year = year1
33
+ end
34
+ end
35
+
36
+ code += '-1' if opts[:all_parts]
37
+ ret = itubib_get1(code, year, opts)
38
+ return nil if ret.nil?
39
+ ret.to_most_recent_reference unless year || opts[:keep_year]
40
+ ret.to_all_parts if opts[:all_parts]
41
+ ret
42
+ end
43
+
44
+ private
45
+
46
+ def fetch_ref_err(code, year, missed_years)
47
+ id = year ? "#{code}:#{year}" : code
48
+ warn "WARNING: no match found online for #{id}. "\
49
+ "The code must be exactly like it is on the standards website."
50
+ warn "(There was no match for #{year}, though there were matches "\
51
+ "found for #{missed_years.join(', ')}.)" unless missed_years.empty?
52
+ if /\d-\d/ =~ code
53
+ warn "The provided document part may not exist, or the document "\
54
+ "may no longer be published in parts."
55
+ else
56
+ warn "If you wanted to cite all document parts for the reference, "\
57
+ "use \"#{code} (all parts)\".\nIf the document is not a standard, "\
58
+ "use its document type abbreviation (TS, TR, PAS, Guide)."
59
+ end
60
+ nil
61
+ end
62
+
63
+ def fetch_pages(s, n)
64
+ workers = WorkersPool.new n
65
+ workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
66
+ s.each_with_index { |hit, i| workers << { i: i, hit: hit } }
67
+ workers.end
68
+ workers.result.sort { |x, y| x[:i] <=> y[:i] }.map { |x| x[:hit] }
69
+ end
70
+
71
+ def search_filter(code)
72
+ docidrx = %r{^ITU-T\s[^\s]+}
73
+ warn "fetching #{code}..."
74
+ result = search(code)
75
+ result.select do |i|
76
+ i.hit[:code] &&
77
+ i.hit[:code].match(docidrx).to_s == code
78
+ end
79
+ end
80
+
81
+ # Sort through the results from Isobib, fetching them three at a time,
82
+ # and return the first result that matches the code,
83
+ # matches the year (if provided), and which # has a title (amendments do not).
84
+ # Only expects the first page of results to be populated.
85
+ # Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
86
+ # If no match, returns any years which caused mismatch, for error reporting
87
+ def isobib_results_filter(result, year)
88
+ missed_years = []
89
+ result.each_slice(3) do |s| # ISO website only allows 3 connections
90
+ fetch_pages(s, 3).each_with_index do |r, i|
91
+ return { ret: r } if !year
92
+ r.dates.select { |d| d.type == "published" }.each do |d|
93
+ return { ret: r } if year.to_i == d.on.year
94
+ missed_years << d.on.year
95
+ end
96
+ end
97
+ end
98
+ { years: missed_years }
99
+ end
100
+
101
+ def itubib_get1(code, year, opts)
102
+ result = search_filter(code) or return nil
103
+ ret = isobib_results_filter(result, year)
104
+ return ret[:ret] if ret[:ret]
105
+ fetch_ref_err(code, year, ret[:years])
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,292 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'iso_bib_item'
4
+ require 'relaton_itu/hit'
5
+ require 'nokogiri'
6
+ require 'net/http'
7
+ require 'relaton_itu/workers_pool'
8
+
9
+ # Capybara.request_driver :poltergeist do |app|
10
+ # Capybara::Poltergeist::Driver.new app, js_errors: false
11
+ # end
12
+ # Capybara.default_driver = :poltergeist
13
+
14
+ module RelatonItu
15
+ # Scrapper.
16
+ # rubocop:disable Metrics/ModuleLength
17
+ module Scrapper
18
+ DOMAIN = 'https://www.itu.int'
19
+
20
+ TYPES = {
21
+ 'ISO' => 'international-standard',
22
+ 'TS' => 'technicalSpecification',
23
+ 'TR' => 'technicalReport',
24
+ 'PAS' => 'publiclyAvailableSpecification',
25
+ 'AWI' => 'appruvedWorkItem',
26
+ 'CD' => 'committeeDraft',
27
+ 'FDIS' => 'finalDraftInternationalStandard',
28
+ 'NP' => 'newProposal',
29
+ 'DIS' => 'draftInternationalStandard',
30
+ 'WD' => 'workingDraft',
31
+ 'R' => 'recommendation',
32
+ 'Guide' => 'guide'
33
+ }.freeze
34
+
35
+ class << self
36
+ # @param text [String]
37
+ # @return [Array<Hash>]
38
+ # def get(text)
39
+ # iso_workers = WorkersPool.new 4
40
+ # iso_workers.worker { |hit| iso_worker(hit, iso_workers) }
41
+ # algolia_workers = start_algolia_search(text, iso_workers)
42
+ # iso_docs = iso_workers.result
43
+ # algolia_workers.end
44
+ # algolia_workers.result
45
+ # iso_docs
46
+ # end
47
+
48
+ # Parse page.
49
+ # @param hit [Hash]
50
+ # @return [Hash]
51
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
52
+ def parse_page(hit_data)
53
+ doc = get_page hit_data[:url]
54
+
55
+ # Fetch edition.
56
+ edition = doc.at("//table/tr/td/span[contains(@id, 'Label8')]/b").text
57
+
58
+ IsoBibItem::IsoBibliographicItem.new(
59
+ docid: fetch_docid(hit_data[:code]),
60
+ edition: edition,
61
+ language: ['en'],
62
+ script: ['Latn'],
63
+ titles: fetch_titles(hit_data),
64
+ type: fetch_type(doc),
65
+ docstatus: fetch_status(doc),
66
+ ics: [], # fetch_ics(doc),
67
+ dates: fetch_dates(doc),
68
+ contributors: fetch_contributors(hit_data[:code]),
69
+ workgroup: fetch_workgroup(doc),
70
+ abstract: fetch_abstract(doc),
71
+ copyright: fetch_copyright(hit_data[:code], doc),
72
+ link: fetch_link(doc, hit_data[:url]),
73
+ relations: fetch_relations(doc)
74
+ )
75
+ end
76
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
77
+
78
+ private
79
+
80
+ # Fetch abstracts.
81
+ # @param doc [Nokigiri::HTML::Document]
82
+ # @return [Array<Array>]
83
+ def fetch_abstract(doc)
84
+ abstract_url = doc.at('//table/tr/td/span[contains(@id, "lbl_dms")]/div')
85
+ return [] unless abstract_url
86
+
87
+ url = abstract_url[:onclick].match(/https?[^']+/).to_s
88
+ d = Nokogiri::HTML Net::HTTP.get(URI(url))
89
+ abstract_content = d.css('p.MsoNormal').text.gsub(/\r\n/, '')
90
+ .gsub(/\s{2,}/, ' ').gsub(/\u00a0/, '')
91
+
92
+ [{
93
+ content: abstract_content,
94
+ language: 'en',
95
+ script: 'Latn'
96
+ }]
97
+ end
98
+
99
+ # Get langs.
100
+ # @param doc [Nokogiri::HTML::Document]
101
+ # @return [Array<Hash>]
102
+ # def langs(doc)
103
+ # lgs = [{ lang: 'en' }]
104
+ # doc.css('ul#lang-switcher ul li a').each do |lang_link|
105
+ # lang_path = lang_link.attr('href')
106
+ # lang = lang_path.match(%r{^\/(fr)\/})
107
+ # lgs << { lang: lang[1], path: lang_path } if lang
108
+ # end
109
+ # lgs
110
+ # end
111
+
112
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
113
+ # Get page.
114
+ # @param path [String] page's path
115
+ # @return [Array<Nokogiri::HTML::Document, String>]
116
+ def get_page(url)
117
+ uri = URI url
118
+ resp = Net::HTTP.get_response(uri)#.encode("UTF-8")
119
+ while resp.code == '301' || resp.code == '302' || resp.code == '303'
120
+ uri = URI resp['location']
121
+ resp = Net::HTTP.get_response(uri)#.encode("UTF-8")
122
+ end
123
+ Nokogiri::HTML(resp.body)
124
+ end
125
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
126
+
127
+ # Fetch docid.
128
+ # @param doc [Nokogiri::HTML::Document]
129
+ # @return [Hash]
130
+ def fetch_docid(code)
131
+ m = code.match(/(?<=\s)(?<project>[^\s]+)-?(?<part>(?<=-)\d+|)-?(?<subpart>(?<=-)\d+|)/)
132
+ {
133
+ project_number: m[:project],
134
+ part_number: m[:part],
135
+ subpart_number: m[:subpart],
136
+ prefix: nil,
137
+ type: 'ITU',
138
+ id: code
139
+ }
140
+ end
141
+
142
+ # Fetch status.
143
+ # @param doc [Nokogiri::HTML::Document]
144
+ # @param status [String]
145
+ # @return [Hash]
146
+ def fetch_status(doc)
147
+ s = doc.at("//table/tr/td/span[contains(@id, 'Label7')]").text
148
+ if s == 'In force'
149
+ status = 'Published'
150
+ stage = '60'
151
+ substage = '60'
152
+ else
153
+ status = 'Withdrawal'
154
+ stage = '95'
155
+ substage = '99'
156
+ end
157
+ { status: status, stage: stage, substage: substage }
158
+ end
159
+
160
+ # Fetch workgroup.
161
+ # @param doc [Nokogiri::HTML::Document]
162
+ # @return [Hash]
163
+ def fetch_workgroup(doc)
164
+ wg = doc.at('//table/tr/td/span[contains(@id, "Label8")]/a').text
165
+ { name: 'International Telecommunication Union',
166
+ abbreviation: 'ITU',
167
+ url: 'www.itu.int',
168
+ technical_committee: {
169
+ name: wg,
170
+ type: 'technicalCommittee',
171
+ number: wg.match(/\d+/)&.to_s&.to_i
172
+ } }
173
+ end
174
+
175
+ # Fetch relations.
176
+ # @param doc [Nokogiri::HTML::Document]
177
+ # @return [Array<Hash>]
178
+ # rubocop:disable Metrics/MethodLength
179
+ def fetch_relations(doc)
180
+ doc.xpath('//div[contains(@id, "tab_sup")]//table/tr[position()>2]').map do |r|
181
+ r_type = r.at('./td/span[contains(@id, "Label4")]/nobr').text.downcase
182
+ type = case r_type
183
+ when 'in force' then 'published'
184
+ else r_type
185
+ end
186
+ ref = r.at('./td/span[contains(@id, "title_e")]/nobr/a')
187
+ url = DOMAIN + ref[:href].sub(/^\./, '/ITU-T/recommendations')
188
+ { type: type, identifier: ref.text, url: url }
189
+ end
190
+ end
191
+
192
+ # Fetch type.
193
+ # @param doc [Nokogiri::HTML::Document]
194
+ # @return [String]
195
+ def fetch_type(doc)
196
+ 'international-standard'
197
+ end
198
+
199
+ # Fetch titles.
200
+ # @param hit_data [Hash]
201
+ # @return [Array<Hash>]
202
+ def fetch_titles(hit_data)
203
+ titles = hit_data[:title].split ' - '
204
+ case titles.size
205
+ when 0
206
+ intro, main, part = nil, "", nil
207
+ when 1
208
+ intro, main, part = nil, titles[0], nil
209
+ when 2
210
+ if /^(Part|Partie) \d+:/ =~ titles[1]
211
+ intro, main, part = nil, titles[0], titles[1]
212
+ else
213
+ intro, main, part = titles[0], titles[1], nil
214
+ end
215
+ when 3
216
+ intro, main, part = titles[0], titles[1], titles[2]
217
+ else
218
+ intro, main, part = titles[0], titles[1], titles[2..-1]&.join(" -- ")
219
+ end
220
+ [{
221
+ title_intro: intro,
222
+ title_main: main,
223
+ title_part: part,
224
+ language: 'en',
225
+ script: 'Latn'
226
+ }]
227
+ end
228
+
229
+ # Fetch dates
230
+ # @param doc [Nokogiri::HTML::Document]
231
+ # @return [Array<Hash>]
232
+ def fetch_dates(doc)
233
+ dates = []
234
+ publish_date = doc.at("//table/tr/td/span[contains(@id, 'Label5')]").text
235
+ unless publish_date.empty?
236
+ dates << { type: 'published', on: publish_date }
237
+ end
238
+ dates
239
+ end
240
+
241
+ # Fetch contributors
242
+ # @param doc [Nokogiri::HTML::Document]
243
+ # @return [Array<Hash>]
244
+ def fetch_contributors(code)
245
+ abbrev = code.sub(/-\w\s.*/, '')
246
+ case abbrev
247
+ when 'ITU'
248
+ name = 'International Telecommunication Union'
249
+ url = 'www.itu.int'
250
+ end
251
+ [{ entity: { name: name, url: url, abbreviation: abbrev }, roles: ['publisher'] }]
252
+ end
253
+
254
+ # Fetch ICS.
255
+ # @param doc [Nokogiri::HTML::Document]
256
+ # @return [Array<Hash>]
257
+ # def fetch_ics(doc)
258
+ # doc.xpath('//th[contains(text(), "ICS")]/following-sibling::td/a').map do |i|
259
+ # code = i.text.match(/[\d\.]+/).to_s.split '.'
260
+ # { field: code[0], group: code[1], subgroup: code[2] }
261
+ # end
262
+ # end
263
+
264
+ # Fetch links.
265
+ # @param doc [Nokogiri::HTML::Document]
266
+ # @param url [String]
267
+ # @return [Array<Hash>]
268
+ def fetch_link(doc, url)
269
+ links = [{ type: 'src', content: url }]
270
+ obp_elms = doc.at('//table/tr/td/span[contains(@id, "Label4")]/a')
271
+ links << { type: 'obp', content: DOMAIN + obp_elms[:href] } if obp_elms
272
+ links
273
+ end
274
+
275
+ # Fetch copyright.
276
+ # @param code [String]
277
+ # @param doc [Nokogiri::HTML::Document]
278
+ # @return [Hash]
279
+ def fetch_copyright(code, doc)
280
+ abbreviation = code.match(/^[^-]+/).to_s
281
+ case abbreviation
282
+ when 'ITU'
283
+ name = 'International Telecommunication Union'
284
+ url = 'www.itu.int'
285
+ end
286
+ from = doc.at("//table/tr/td/span[contains(@id, 'Label5')]").text
287
+ { owner: { name: name, abbreviation: abbreviation, url: url }, from: from }
288
+ end
289
+ end
290
+ end
291
+ # rubocop:enable Metrics/ModuleLength
292
+ end
@@ -0,0 +1,3 @@
1
+ module RelatonItu
2
+ VERSION = "0.1.0".freeze
3
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Workers poll.
4
+ class WorkersPool
5
+ attr_accessor :nb_hits
6
+
7
+ def initialize(num_workers = 2)
8
+ @num_workers = num_workers < 2 ? 2 : num_workers
9
+ @queue = SizedQueue.new(num_workers * 2)
10
+ @result = []
11
+ @nb_hits = 0
12
+ end
13
+
14
+ def worker(&block)
15
+ @threads = Array.new @num_workers do
16
+ Thread.new do
17
+ until (item = @queue.pop) == :END
18
+ @result << yield(item) if block
19
+ end
20
+ end
21
+ end
22
+ end
23
+
24
+ def result
25
+ @threads.each(&:join)
26
+ @result
27
+ end
28
+
29
+ def <<(item)
30
+ @queue << item
31
+ self
32
+ end
33
+
34
+ def end
35
+ @num_workers.times { @queue << :END }
36
+ end
37
+
38
+ def size
39
+ @result.size
40
+ end
41
+ end
metadata ADDED
@@ -0,0 +1,221 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: relaton-itu
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Ribose Inc.
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2019-05-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: equivalent-xml
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.6'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.6'
41
+ - !ruby/object:Gem::Dependency
42
+ name: pry-byebug
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '10.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '3.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '3.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: simplecov
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: vcr
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: webmock
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: ruby-debug-ide
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: debase
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: iso-bib-item
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: 0.4.2
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: 0.4.2
167
+ description: 'RelatonItu: retrieve ITU Standards for bibliographic use using the BibliographicItem
168
+ model'
169
+ email:
170
+ - open.source@ribose.com
171
+ executables: []
172
+ extensions: []
173
+ extra_rdoc_files: []
174
+ files:
175
+ - ".gitignore"
176
+ - ".rspec"
177
+ - ".rubocop.yml"
178
+ - ".travis.yml"
179
+ - Gemfile
180
+ - Gemfile.lock
181
+ - LICENSE.txt
182
+ - README.adoc
183
+ - Rakefile
184
+ - appveyor.yml
185
+ - bin/console
186
+ - bin/setup
187
+ - itubib.gemspec
188
+ - lib/relaton/processor.rb
189
+ - lib/relaton_itu.rb
190
+ - lib/relaton_itu/hit.rb
191
+ - lib/relaton_itu/hit_collection.rb
192
+ - lib/relaton_itu/itu_bibliography.rb
193
+ - lib/relaton_itu/scrapper.rb
194
+ - lib/relaton_itu/version.rb
195
+ - lib/relaton_itu/workers_pool.rb
196
+ homepage: https://github.com/metanorma/relaton-itu
197
+ licenses:
198
+ - MIT
199
+ metadata: {}
200
+ post_install_message:
201
+ rdoc_options: []
202
+ require_paths:
203
+ - lib
204
+ required_ruby_version: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - ">="
207
+ - !ruby/object:Gem::Version
208
+ version: '0'
209
+ required_rubygems_version: !ruby/object:Gem::Requirement
210
+ requirements:
211
+ - - ">="
212
+ - !ruby/object:Gem::Version
213
+ version: '0'
214
+ requirements: []
215
+ rubyforge_project:
216
+ rubygems_version: 2.6.12
217
+ signing_key:
218
+ specification_version: 4
219
+ summary: 'RelatonItu: retrieve ITU Standards for bibliographic use using the BibliographicItem
220
+ model'
221
+ test_files: []