isobib 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +3 -0
- data/.rubocop.yml +4 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +66 -0
- data/README.adoc +172 -0
- data/Rakefile +6 -0
- data/bin/bundle +105 -0
- data/bin/byebug +29 -0
- data/bin/coderay +29 -0
- data/bin/console +14 -0
- data/bin/htmldiff +29 -0
- data/bin/httpclient +29 -0
- data/bin/ldiff +29 -0
- data/bin/nokogiri +29 -0
- data/bin/pry +29 -0
- data/bin/rake +29 -0
- data/bin/rspec +29 -0
- data/bin/setup +8 -0
- data/isobib.gemspec +40 -0
- data/lib/isobib/hit.rb +55 -0
- data/lib/isobib/hit_collection.rb +42 -0
- data/lib/isobib/hit_pages.rb +94 -0
- data/lib/isobib/iso_bibliography.rb +24 -0
- data/lib/isobib/scrapper.rb +378 -0
- data/lib/isobib/version.rb +5 -0
- data/lib/isobib/workers_pool.rb +41 -0
- data/lib/isobib.rb +4 -0
- metadata +174 -0
data/bin/pry
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'pry' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 150) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("pry", "pry")
|
data/bin/rake
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rake' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 150) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("rake", "rake")
|
data/bin/rspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rspec' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 150) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("rspec-core", "rspec")
|
data/bin/setup
ADDED
data/isobib.gemspec
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require 'isobib/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |spec|
|
8
|
+
spec.name = 'isobib'
|
9
|
+
spec.version = Isobib::VERSION
|
10
|
+
spec.authors = ['Ribose Inc.']
|
11
|
+
spec.email = ['open.source@ribose.com']
|
12
|
+
|
13
|
+
spec.summary = 'IsoBib: retrieve ISO Standards for bibliographic use '\
|
14
|
+
'using the BibliographicItem model'
|
15
|
+
spec.description = 'IsoBib: retrieve ISO Standards for bibliographic use '\
|
16
|
+
'using the BibliographicItem model'
|
17
|
+
|
18
|
+
spec.homepage = 'https://github.com/riboseinc/isobib'
|
19
|
+
spec.license = 'MIT'
|
20
|
+
|
21
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
22
|
+
f.match(%r{^(test|spec|features)/})
|
23
|
+
end
|
24
|
+
spec.bindir = 'exe'
|
25
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
26
|
+
spec.require_paths = ['lib']
|
27
|
+
|
28
|
+
spec.add_development_dependency 'bundler', '~> 1.16'
|
29
|
+
spec.add_development_dependency 'pry-byebug'
|
30
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
31
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
32
|
+
spec.add_development_dependency 'simplecov'
|
33
|
+
|
34
|
+
spec.add_dependency 'algoliasearch'
|
35
|
+
spec.add_dependency 'iso-bib-item'
|
36
|
+
# spec.add_dependency 'isoics'
|
37
|
+
# spec.add_dependency 'nokogiri'
|
38
|
+
# spec.add_dependency "capybara"
|
39
|
+
# spec.add_dependency "poltergeist"
|
40
|
+
end
|
data/lib/isobib/hit.rb
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Isobib
|
4
|
+
# Hit.
|
5
|
+
class Hit
|
6
|
+
# @return [Isobib::HitCollection]
|
7
|
+
attr_reader :hit_collection
|
8
|
+
|
9
|
+
# @return [Array<Hash>]
|
10
|
+
attr_reader :hit
|
11
|
+
|
12
|
+
# @param hit [Hash]
|
13
|
+
# @param hit_collection [Isobib:HitCollection]
|
14
|
+
def initialize(hit, hit_collection = nil)
|
15
|
+
@hit = hit
|
16
|
+
@hit_collection = hit_collection
|
17
|
+
end
|
18
|
+
|
19
|
+
# Parse page.
|
20
|
+
# @return [Isobib::IsoBibliographicItem]
|
21
|
+
def fetch
|
22
|
+
@fetch ||= Scrapper.parse_page @hit
|
23
|
+
end
|
24
|
+
|
25
|
+
# @return [String]
|
26
|
+
def to_s
|
27
|
+
inspect
|
28
|
+
end
|
29
|
+
|
30
|
+
# @return [String]
|
31
|
+
def inspect
|
32
|
+
matched_words = @hit['_highlightResult']
|
33
|
+
.inject([]) { |a, (_k, v)| a + v['matchedWords'] }.uniq
|
34
|
+
|
35
|
+
"<#{self.class}:#{format('%#.14x', object_id << 1)} "\
|
36
|
+
"@text=\"#{@hit_collection&.hit_pages&.text}\" "\
|
37
|
+
"@fullIdentifier=\"#{@fetch&.shortref}\" "\
|
38
|
+
"@matchedWords=#{matched_words} "\
|
39
|
+
"@category=\"#{@hit['category']}\" "\
|
40
|
+
"@title=\"#{@hit['title']}\">"
|
41
|
+
end
|
42
|
+
|
43
|
+
# @return [String]
|
44
|
+
def to_xml(builder = nil, opts = {})
|
45
|
+
if builder
|
46
|
+
fetch.to_xml builder, opts
|
47
|
+
else
|
48
|
+
builder = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
|
49
|
+
fetch.to_xml xml, opts
|
50
|
+
end
|
51
|
+
builder.doc.root.to_xml
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'isobib/hit'
|
4
|
+
|
5
|
+
module Isobib
|
6
|
+
# Page of hit collection.
|
7
|
+
class HitCollection < Array
|
8
|
+
# @return [TrueClass, FalseClass]
|
9
|
+
attr_reader :fetched
|
10
|
+
|
11
|
+
# @return [Isobib::HitPages]
|
12
|
+
attr_reader :hit_pages
|
13
|
+
|
14
|
+
# @param hits [Array<Hash>]
|
15
|
+
def initialize(hits, hit_pages = nil)
|
16
|
+
concat(hits.map { |h| Hit.new(h, self) })
|
17
|
+
@fetched = false
|
18
|
+
@hit_pages = hit_pages
|
19
|
+
end
|
20
|
+
|
21
|
+
# @return [Isobib::HitCollection]
|
22
|
+
def fetch
|
23
|
+
workers = WorkersPool.new 4
|
24
|
+
workers.worker(&:fetch)
|
25
|
+
each do |hit|
|
26
|
+
workers << hit
|
27
|
+
end
|
28
|
+
workers.end
|
29
|
+
workers.result
|
30
|
+
@fetched = true
|
31
|
+
self
|
32
|
+
end
|
33
|
+
|
34
|
+
def to_s
|
35
|
+
inspect
|
36
|
+
end
|
37
|
+
|
38
|
+
def inspect
|
39
|
+
"<#{self.class}:#{format('%#.14x', object_id << 1)} @fetched=#{@fetched}>"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'algoliasearch'
|
4
|
+
require 'isobib/hit_collection'
|
5
|
+
|
6
|
+
module Isobib
|
7
|
+
# Pages of hits.
|
8
|
+
class HitPages < Array
|
9
|
+
Algolia.init application_id: 'JCL49WV5AR',
|
10
|
+
api_key: 'dd1b9e1ab383f4d4817d29cd5e96d3f0'
|
11
|
+
|
12
|
+
# @return [String]
|
13
|
+
attr_reader :text
|
14
|
+
|
15
|
+
# @param text [String]
|
16
|
+
def initialize(text)
|
17
|
+
@text = text
|
18
|
+
@index = Algolia::Index.new 'all_en'
|
19
|
+
resp = @index.search(text, facetFilters: ['category:standard'])
|
20
|
+
@nb_pages = resp['nbPages']
|
21
|
+
self << HitCollection.new(resp['hits'], self)
|
22
|
+
end
|
23
|
+
|
24
|
+
# @return [Isobib::HitCollection]
|
25
|
+
def last
|
26
|
+
collection(@nb_pages - 1)
|
27
|
+
end
|
28
|
+
|
29
|
+
# @param i [Integer]
|
30
|
+
# @return [Isobib::HitCollection]
|
31
|
+
def [](idx)
|
32
|
+
# collection i
|
33
|
+
return if idx + 1 > @nb_pages
|
34
|
+
collection idx
|
35
|
+
super
|
36
|
+
end
|
37
|
+
|
38
|
+
# @return [Array]
|
39
|
+
def map(&block)
|
40
|
+
m = []
|
41
|
+
@nb_pages.times do |n|
|
42
|
+
m << yield(self[n]) if block
|
43
|
+
end
|
44
|
+
m
|
45
|
+
end
|
46
|
+
|
47
|
+
def each(&block)
|
48
|
+
@nb_pages.times do |n|
|
49
|
+
yield self[n] if block
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def to_s
|
54
|
+
inspect
|
55
|
+
end
|
56
|
+
|
57
|
+
def inspect
|
58
|
+
"<#{self.class}:#{format('%#.14x', object_id << 1)} @text=#{@text} "\
|
59
|
+
"@pages=#{@nb_pages}>"
|
60
|
+
end
|
61
|
+
|
62
|
+
# @return [Integer]
|
63
|
+
def size
|
64
|
+
@nb_pages
|
65
|
+
end
|
66
|
+
|
67
|
+
def to_xml
|
68
|
+
builder = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
|
69
|
+
xml.documents do
|
70
|
+
each do |page|
|
71
|
+
page.fetch
|
72
|
+
page.each { |hit| hit.to_xml xml }
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
builder.to_xml
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
# @param i [Integer]
|
82
|
+
# @return [Isobib::HitCollection]
|
83
|
+
def collection(idx)
|
84
|
+
return if idx + 1 > @nb_pages
|
85
|
+
while Array.instance_method(:size).bind(self).call < idx + 1
|
86
|
+
resp = @index.search(@text,
|
87
|
+
facetFilters: ['category:standard'],
|
88
|
+
page: idx)
|
89
|
+
self << HitCollection.new(resp['hits'], self)
|
90
|
+
end
|
91
|
+
Array.instance_method(:[]).bind(self).call idx
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# require 'isobib/iso_bibliographic_item'
|
4
|
+
require 'isobib/scrapper'
|
5
|
+
require 'isobib/hit_pages'
|
6
|
+
|
7
|
+
module Isobib
|
8
|
+
# Class methods for search ISO standards.
|
9
|
+
class IsoBibliography
|
10
|
+
class << self
|
11
|
+
# @param text [String]
|
12
|
+
# @return [Isobib::HitPages]
|
13
|
+
def search(text)
|
14
|
+
HitPages.new text
|
15
|
+
end
|
16
|
+
|
17
|
+
# @param text [String]
|
18
|
+
# @return [Array<IsoBibliographicItem>]
|
19
|
+
def search_and_fetch(text)
|
20
|
+
Scrapper.get(text)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|