relaton-core 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/relaton/core/data_fetcher.rb +35 -55
- data/lib/relaton/core/hit.rb +55 -0
- data/lib/relaton/core/hit_collection.rb +106 -0
- data/lib/relaton/core/processor.rb +48 -0
- data/lib/relaton/core/version.rb +1 -1
- data/lib/relaton/core/workers_pool.rb +45 -0
- data/lib/relaton/core.rb +5 -2
- metadata +15 -25
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c165688976f3768ae55ababdba7bfdccd0e3fb3114c4793fe4497d74e88970da
|
4
|
+
data.tar.gz: 12f685b833874257bf04d87bf2a74375e2c72681a8861712b37c7cc52f513659
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3cceb7dc3f6dbd33cb08851b63b4d78239d61775e1e91fbd14330549693305b575b966d7b8bf4a66397317308638dde39b61e4eaa5ed9567ce3b0466bb618bbb
|
7
|
+
data.tar.gz: 6b5d18e51d50c2e1a5d457de051c32b5dcbb7768d809fb796d67efb8c811a0a5b2eed96874ae8f8d12961cbf347a90ca54e695a53c410444f013fda0f231b9a9
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Relaton::Core
|
2
2
|
class DataFetcher
|
3
|
-
attr_accessor :docs
|
3
|
+
# attr_accessor :docs
|
4
4
|
#
|
5
5
|
# Initialize fetcher
|
6
6
|
#
|
@@ -11,14 +11,9 @@ module Relaton::Core
|
|
11
11
|
@output = output
|
12
12
|
@format = format
|
13
13
|
@ext = format.sub "bibxml", "xml"
|
14
|
-
@files =
|
15
|
-
@docs = []
|
16
|
-
|
17
|
-
|
18
|
-
def index
|
19
|
-
@index ||= Relaton::Index.find_or_create self.class::INDEX_TYPE,
|
20
|
-
file: self.class::INDEX_FILE,
|
21
|
-
pubid_class: self.class.get_identifier_class
|
14
|
+
@files = Set.new
|
15
|
+
# @docs = []
|
16
|
+
@errors = Hash.new(true)
|
22
17
|
end
|
23
18
|
|
24
19
|
# API method for external service
|
@@ -32,58 +27,35 @@ module Relaton::Core
|
|
32
27
|
puts "Done in: #{(t2 - t1).round} sec."
|
33
28
|
end
|
34
29
|
|
35
|
-
def self.get_identifier_class
|
36
|
-
raise NotImplementedError, "#{self.class}#get_identifier_class method must be implemented"
|
37
|
-
end
|
38
|
-
|
39
30
|
def fetch
|
40
|
-
|
41
|
-
fetch_docs OBSOLETE_PUBS_URL, retired: true
|
42
|
-
index.save
|
31
|
+
raise NotImplementedError, "#{self.class}#fetch method must be implemented"
|
43
32
|
end
|
44
33
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
34
|
+
def gh_issue
|
35
|
+
return @gh_issue if defined? @gh_issue
|
36
|
+
|
37
|
+
@gh_issue = Relaton::Logger::Channels::GhIssue.new(*gh_issue_channel)
|
38
|
+
Relaton.logger_pool[:gh_issue] = Relaton::Logger::Log.new(@gh_issue, levels: [:error])
|
39
|
+
@gh_issue
|
50
40
|
end
|
51
41
|
|
52
|
-
|
53
|
-
|
54
|
-
def get_output_file(bib)
|
55
|
-
File.join @output, "#{bib.docidentifier.first.id.gsub(/[.\s-]+/, '-')}.#{@ext}"
|
42
|
+
def gh_issue_channel
|
43
|
+
raise NotImplementedError, "#{self.class}#gh_issue_channel method must be implemented"
|
56
44
|
end
|
57
45
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
# @param [Hash] doc document data
|
62
|
-
# @param [Boolean] retired if true then document is retired
|
63
|
-
#
|
64
|
-
# @return [void]
|
65
|
-
#
|
66
|
-
def parse_and_save(doc)
|
67
|
-
bibitem = parse(doc)
|
68
|
-
save_bib(bibitem)
|
69
|
-
index_add_or_update(bibitem)
|
46
|
+
def repot_errors
|
47
|
+
@errors.select { |_, v| v }.each_key { |k| log_error "Failed to fetch #{k}" }
|
48
|
+
gh_issue.create_issue
|
70
49
|
end
|
71
50
|
|
72
|
-
|
73
|
-
|
74
|
-
#
|
75
|
-
# @param [RelatonBib::BibliographicItem] bib bibitem
|
76
|
-
#
|
77
|
-
# @return [void]
|
78
|
-
#
|
79
|
-
def save_bib(bib)
|
80
|
-
file = get_output_file(bib)
|
81
|
-
File.write file, serialize(bib), encoding: "UTF-8"
|
51
|
+
def log_error(_msg)
|
52
|
+
raise NoMatchingPatternError, "#{self.class}#log_error method must be implemented"
|
82
53
|
end
|
83
54
|
|
84
|
-
|
85
|
-
|
86
|
-
|
55
|
+
# @param [String] document ID
|
56
|
+
# @return [String] filename based on PubID identifier
|
57
|
+
def output_file(docid)
|
58
|
+
File.join @output, "#{docid.downcase.gsub(/[.\s\/-]+/, '-')}.#{@ext}"
|
87
59
|
end
|
88
60
|
|
89
61
|
#
|
@@ -94,11 +66,19 @@ module Relaton::Core
|
|
94
66
|
# @return [String] serialized bibliographic item
|
95
67
|
#
|
96
68
|
def serialize(bib)
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
69
|
+
send "to_#{@format}", bib
|
70
|
+
end
|
71
|
+
|
72
|
+
def to_yaml(bib)
|
73
|
+
raise NotImplementedError, "#{self.class}#to_yaml method must be implemented"
|
74
|
+
end
|
75
|
+
|
76
|
+
def to_xml(bib)
|
77
|
+
raise NotImplementedError, "#{self.class}#to_xml method must be implemented"
|
78
|
+
end
|
79
|
+
|
80
|
+
def to_bibxml(bib)
|
81
|
+
raise NotImplementedError, "#{self.class}#to_bibxml method must be implemented"
|
102
82
|
end
|
103
83
|
end
|
104
84
|
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require "weakref"
|
2
|
+
|
3
|
+
module Relaton
|
4
|
+
module Core
|
5
|
+
class Hit
|
6
|
+
# @return [RelatonBib::HitCollection]
|
7
|
+
attr_accessor :hit_collection
|
8
|
+
|
9
|
+
# @return [Array<Hash>]
|
10
|
+
attr_reader :hit
|
11
|
+
|
12
|
+
# @param hit [Hash]
|
13
|
+
# @param hit_collection [RelatonBib::HitCollection]
|
14
|
+
def initialize(hit, hit_collection = nil)
|
15
|
+
@hit = hit
|
16
|
+
@hit_collection = WeakRef.new hit_collection if hit_collection
|
17
|
+
end
|
18
|
+
|
19
|
+
# @return [String]
|
20
|
+
def to_s
|
21
|
+
inspect
|
22
|
+
end
|
23
|
+
|
24
|
+
# @return [String]
|
25
|
+
def inspect
|
26
|
+
"<#{self.class}:#{format('%<id>#.14x', id: object_id << 1)} " \
|
27
|
+
"@text=\"#{@hit_collection&.text}\" " \
|
28
|
+
"@fetched=\"#{!@fetch.nil?}\" " \
|
29
|
+
"@fullIdentifier=\"#{@fetch&.shortref(nil)}\" " \
|
30
|
+
"@title=\"#{@hit[:code]}\">"
|
31
|
+
end
|
32
|
+
|
33
|
+
# @return [RelatonBib::ItemData]
|
34
|
+
def item
|
35
|
+
raise "Not implemented"
|
36
|
+
end
|
37
|
+
|
38
|
+
# @param opts [Hash]
|
39
|
+
# @option opts [Nokogiri::XML::Builder] :builder XML builder
|
40
|
+
# @option opts [Boolean] :bibdata
|
41
|
+
# @option opts [String, Symbol] :lang language
|
42
|
+
# @return [String] XML
|
43
|
+
# def to_xml(**opts)
|
44
|
+
# if opts[:builder]
|
45
|
+
# fetch.to_xml(**opts)
|
46
|
+
# else
|
47
|
+
# builder = Nokogiri::XML::Builder.new(encoding: "UTF-8") do |xml|
|
48
|
+
# fetch.to_xml(**opts.merge(builder: xml))
|
49
|
+
# end
|
50
|
+
# builder.doc.root.to_xml
|
51
|
+
# end
|
52
|
+
# end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require "forwardable"
|
2
|
+
require_relative "hit"
|
3
|
+
|
4
|
+
module Relaton
|
5
|
+
module Core
|
6
|
+
class HitCollection
|
7
|
+
extend Forwardable
|
8
|
+
|
9
|
+
def_delegators :@array, :<<, :[], :first, :empty?, :any?, :size, :each, :each_slice, :reduce, :map
|
10
|
+
|
11
|
+
# @return [TrueClass, FalseClass]
|
12
|
+
attr_reader :fetched
|
13
|
+
|
14
|
+
# @return [String]
|
15
|
+
attr_reader :ref
|
16
|
+
|
17
|
+
# @return [String]
|
18
|
+
attr_reader :year
|
19
|
+
|
20
|
+
#
|
21
|
+
# @param ref [String, Pubid] reference to search
|
22
|
+
# @param year [String, nil] year of publication
|
23
|
+
#
|
24
|
+
def initialize(ref, year = nil)
|
25
|
+
@array = []
|
26
|
+
@ref = ref
|
27
|
+
@year = year
|
28
|
+
@fetched = false
|
29
|
+
end
|
30
|
+
|
31
|
+
#
|
32
|
+
# Fetches hits from the data source
|
33
|
+
#
|
34
|
+
# @return [self] self object
|
35
|
+
#
|
36
|
+
def fetch
|
37
|
+
workers = WorkersPool.new 4
|
38
|
+
workers.worker(&:item)
|
39
|
+
each do |hit|
|
40
|
+
workers << hit
|
41
|
+
end
|
42
|
+
workers.end
|
43
|
+
workers.result
|
44
|
+
@fetched = true
|
45
|
+
self
|
46
|
+
end
|
47
|
+
|
48
|
+
#
|
49
|
+
# Renders the collection as XML
|
50
|
+
#
|
51
|
+
# @param opts [Hash] options
|
52
|
+
# @option opts [Nokogiri::XML::Builder] :builder XML builder
|
53
|
+
# @option opts [Boolean] :bibdata render bibdata if true
|
54
|
+
# @option opts [String, Symbol] :lang language
|
55
|
+
#
|
56
|
+
# @return [String] XML representation of the collection
|
57
|
+
#
|
58
|
+
# def to_xml(**opts)
|
59
|
+
# builder = Nokogiri::XML::Builder.new(encoding: "UTF-8") do |xml|
|
60
|
+
# xml.documents do
|
61
|
+
# @array.each do |hit|
|
62
|
+
# hit.fetch
|
63
|
+
# hit.to_xml(**opts.merge(builder: xml))
|
64
|
+
# end
|
65
|
+
# end
|
66
|
+
# end
|
67
|
+
# builder.to_xml
|
68
|
+
# end
|
69
|
+
|
70
|
+
#
|
71
|
+
# Selects matching hits and returns a new collection
|
72
|
+
#
|
73
|
+
# @param [Proc] &block proc to select hits
|
74
|
+
#
|
75
|
+
# @return [RelatonBib::HitCollection] new hit collection
|
76
|
+
#
|
77
|
+
def select!(&block)
|
78
|
+
@array.select!(&block)
|
79
|
+
self
|
80
|
+
end
|
81
|
+
|
82
|
+
def reduce!(sum, &block)
|
83
|
+
@array = @array.reduce sum, &block
|
84
|
+
self
|
85
|
+
end
|
86
|
+
|
87
|
+
#
|
88
|
+
# Returns String representation of the collection
|
89
|
+
#
|
90
|
+
# @return [String] String representation of the collection
|
91
|
+
#
|
92
|
+
def to_s
|
93
|
+
inspect
|
94
|
+
end
|
95
|
+
|
96
|
+
#
|
97
|
+
# Returns String representation of the collection
|
98
|
+
#
|
99
|
+
# @return [String] String representation of the collection
|
100
|
+
#
|
101
|
+
def inspect
|
102
|
+
"<#{self.class}:#{format('%#.14x', object_id << 1)} @ref=#{@ref} @fetched=#{@fetched}>"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module Relaton
|
2
|
+
module Core
|
3
|
+
class Processor
|
4
|
+
# @rerurn [Symbol]
|
5
|
+
attr_reader :short
|
6
|
+
|
7
|
+
# @return [String]
|
8
|
+
attr_reader :prefix, :idtype
|
9
|
+
|
10
|
+
# @return [Regexp]
|
11
|
+
attr_reader :defaultprefix
|
12
|
+
|
13
|
+
# @return [Array<String>]
|
14
|
+
attr_reader :datasets
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
raise "This is an abstract class!"
|
18
|
+
end
|
19
|
+
|
20
|
+
def get(_code, _date, _opts)
|
21
|
+
raise "This is an abstract class!"
|
22
|
+
end
|
23
|
+
|
24
|
+
def fetch_data(_source, _opts)
|
25
|
+
raise "This is an abstract class!"
|
26
|
+
end
|
27
|
+
|
28
|
+
def from_xml(_xml)
|
29
|
+
raise "This is an abstract class!"
|
30
|
+
end
|
31
|
+
|
32
|
+
def from_yaml(_hash)
|
33
|
+
raise "This is an abstract class!"
|
34
|
+
end
|
35
|
+
|
36
|
+
def grammar_hash
|
37
|
+
raise "This is an abstract class!"
|
38
|
+
end
|
39
|
+
|
40
|
+
# Retuns default number of workers. Should be overraded by childred classes if need.
|
41
|
+
#
|
42
|
+
# @return [Integer] nuber of wokrers
|
43
|
+
def threads
|
44
|
+
10
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
data/lib/relaton/core/version.rb
CHANGED
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Relaton
|
4
|
+
module Core
|
5
|
+
# Workers poll.
|
6
|
+
class WorkersPool
|
7
|
+
attr_accessor :nb_hits
|
8
|
+
|
9
|
+
def initialize(num_workers = 2)
|
10
|
+
@num_workers = num_workers < 2 ? 2 : num_workers
|
11
|
+
@queue = SizedQueue.new(num_workers * 2)
|
12
|
+
@result = []
|
13
|
+
@nb_hits = 0
|
14
|
+
end
|
15
|
+
|
16
|
+
def worker(&block)
|
17
|
+
@threads = Array.new @num_workers do
|
18
|
+
Thread.new do
|
19
|
+
until (item = @queue.pop) == :END
|
20
|
+
@result << yield(item) if block
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def result
|
27
|
+
@threads.each(&:join)
|
28
|
+
@result
|
29
|
+
end
|
30
|
+
|
31
|
+
def <<(item)
|
32
|
+
@queue << item
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
def end
|
37
|
+
@num_workers.times { @queue << :END }
|
38
|
+
end
|
39
|
+
|
40
|
+
def size
|
41
|
+
@result.size
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/relaton/core.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-04-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -39,47 +39,33 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '3.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: simplecov
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
48
|
-
type: :
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - "~>"
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: 1.20.0
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: relaton-index
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: 0.2.16
|
62
|
-
type: :runtime
|
47
|
+
version: 0.21.2
|
48
|
+
type: :development
|
63
49
|
prerelease: false
|
64
50
|
version_requirements: !ruby/object:Gem::Requirement
|
65
51
|
requirements:
|
66
52
|
- - "~>"
|
67
53
|
- !ruby/object:Gem::Version
|
68
|
-
version: 0.2
|
54
|
+
version: 0.21.2
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
56
|
+
name: relaton-logger
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
72
58
|
requirements:
|
73
59
|
- - "~>"
|
74
60
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
61
|
+
version: 0.2.0
|
76
62
|
type: :runtime
|
77
63
|
prerelease: false
|
78
64
|
version_requirements: !ruby/object:Gem::Requirement
|
79
65
|
requirements:
|
80
66
|
- - "~>"
|
81
67
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
68
|
+
version: 0.2.0
|
83
69
|
description: Library for importing and caching bibliographic references to technical
|
84
70
|
standards
|
85
71
|
email:
|
@@ -95,7 +81,11 @@ files:
|
|
95
81
|
- lib/relaton-core.rb
|
96
82
|
- lib/relaton/core.rb
|
97
83
|
- lib/relaton/core/data_fetcher.rb
|
84
|
+
- lib/relaton/core/hit.rb
|
85
|
+
- lib/relaton/core/hit_collection.rb
|
86
|
+
- lib/relaton/core/processor.rb
|
98
87
|
- lib/relaton/core/version.rb
|
88
|
+
- lib/relaton/core/workers_pool.rb
|
99
89
|
homepage: https://github.com/relaton/relaton-core
|
100
90
|
licenses:
|
101
91
|
- BSD-2-Clause
|
@@ -108,14 +98,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
108
98
|
requirements:
|
109
99
|
- - ">="
|
110
100
|
- !ruby/object:Gem::Version
|
111
|
-
version:
|
101
|
+
version: 3.1.0
|
112
102
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
103
|
requirements:
|
114
104
|
- - ">="
|
115
105
|
- !ruby/object:Gem::Version
|
116
106
|
version: '0'
|
117
107
|
requirements: []
|
118
|
-
rubygems_version: 3.
|
108
|
+
rubygems_version: 3.5.22
|
119
109
|
signing_key:
|
120
110
|
specification_version: 4
|
121
111
|
summary: Library for importing and caching bibliographic references to technical standards
|