bel_parser 1.0.0.alpha.16 → 1.0.0.alpha.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/bin/bel2_validator +11 -3
- data/bin/bel_script_reader +11 -3
- data/lib/bel_parser/expression/validator.rb +2 -2
- data/lib/bel_parser/resource/eager_reader.rb +41 -27
- data/lib/bel_parser/resource/file_resource.rb +21 -0
- data/lib/bel_parser/resource/file_resource_value.rb +24 -0
- data/lib/bel_parser/resource/lru_cache.rb +1 -1
- data/lib/bel_parser/resource/lru_reader.rb +14 -15
- data/lib/bel_parser/resource/resource_url_reader.rb +160 -0
- data/lib/bel_parser/script/validator.rb +4 -2
- metadata +4 -3
- data/lib/bel_parser/resource/http_cache.rb +0 -89
- data/lib/bel_parser/resource/resource_file_reader.rb +0 -135
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 21d0de137c850359607608e5cdca2a4481934fd5
|
4
|
+
data.tar.gz: aebe4a51435a51ee91d410a2c5ff0753ec3d51d2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 51d92527013676307ee3307496f1282fe838a9f57b138e1d74d6dcdb49cd7061fd913c26111422f9e1fcad3c61dcbd1ead176996ec90e20acc2e272b49de710b
|
7
|
+
data.tar.gz: edf6a891d9d6d9ba08dcef2cf4f6090cba96b040b931c8ee7aa39296bc7a6a305b81b45c2e617254b9b293a92a0e90db80756327b2b6e90430b1c8b68caeb184
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0.0.alpha.
|
1
|
+
1.0.0.alpha.17
|
data/bin/bel2_validator
CHANGED
@@ -9,7 +9,6 @@ unless ARGV.first
|
|
9
9
|
USAGE
|
10
10
|
exit 1
|
11
11
|
end
|
12
|
-
namespaces = Hash[ARGV[1..-1].map { |ns| ns.split('=') }]
|
13
12
|
|
14
13
|
def syntax_results(results)
|
15
14
|
results.select do |res|
|
@@ -25,9 +24,18 @@ end
|
|
25
24
|
|
26
25
|
require 'bel_parser'
|
27
26
|
require 'bel_parser/expression/validator'
|
28
|
-
require 'bel_parser/resource/
|
27
|
+
require 'bel_parser/resource/resource_url_reader'
|
28
|
+
|
29
|
+
resource_reader = BELParser::Resource::ResourceURLReader.new(true)
|
30
|
+
namespaces =
|
31
|
+
Hash[
|
32
|
+
ARGV.map do |ns|
|
33
|
+
prefix, identifier = ns.split('=')
|
34
|
+
dataset = resource_reader.retrieve_resource(identifier)
|
35
|
+
dataset ? [prefix, dataset] : nil
|
36
|
+
end.compact
|
37
|
+
]
|
29
38
|
|
30
|
-
resource_reader = BELParser::Resource::ResourceFileReader.new
|
31
39
|
BELParser::Expression::Validator
|
32
40
|
.new(ARGV.first, namespaces, resource_reader)
|
33
41
|
.each($stdin) do |(line_number, line, ast, results)|
|
data/bin/bel_script_reader
CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(
|
|
4
4
|
|
5
5
|
require 'optparse'
|
6
6
|
require 'bel_parser'
|
7
|
-
require 'bel_parser/resource/
|
7
|
+
require 'bel_parser/resource/resource_url_reader'
|
8
8
|
|
9
9
|
options = {
|
10
10
|
spec: BELParser::Language.specification(
|
@@ -37,6 +37,14 @@ OptionParser.new do |opts|
|
|
37
37
|
|
38
38
|
options[:spec] = BELParser::Language.specification(spec)
|
39
39
|
end
|
40
|
+
|
41
|
+
opts.on(
|
42
|
+
'-r',
|
43
|
+
'--[no-]reuse-resource-databases',
|
44
|
+
'Enable to reuse resource databases if they exist.') do |reuse|
|
45
|
+
|
46
|
+
options[:reuse] = reuse
|
47
|
+
end
|
40
48
|
end.parse!
|
41
49
|
|
42
50
|
file, spec = options.values_at(:file, :spec)
|
@@ -70,7 +78,7 @@ SYN_ERR = BELParser::Language::Syntax::SyntaxError
|
|
70
78
|
SYN_WARN = BELParser::Language::Syntax::SyntaxWarning
|
71
79
|
SEM_WARN = BELParser::Language::Semantics::SemanticsWarning
|
72
80
|
|
73
|
-
rr
|
81
|
+
rr = BELParser::Resource::ResourceURLReader.new(options[:reuse])
|
74
82
|
namespaces = Hash[
|
75
83
|
ARGV.map do |ns|
|
76
84
|
prefix, identifier = ns.split('=')
|
@@ -80,7 +88,7 @@ namespaces = Hash[
|
|
80
88
|
]
|
81
89
|
|
82
90
|
initial_state = {
|
83
|
-
resource_reader:
|
91
|
+
resource_reader: rr,
|
84
92
|
specification: spec,
|
85
93
|
namespace_definitions: namespaces
|
86
94
|
}
|
@@ -49,9 +49,9 @@ if __FILE__ == $PROGRAM_NAME
|
|
49
49
|
exit 1
|
50
50
|
end
|
51
51
|
|
52
|
-
require 'bel_parser/resource/
|
52
|
+
require 'bel_parser/resource/resource_url_reader'
|
53
53
|
|
54
|
-
resource_reader = BELParser::Resource::
|
54
|
+
resource_reader = BELParser::Resource::ResourceURLReader.new
|
55
55
|
namespaces =
|
56
56
|
Hash[
|
57
57
|
ARGV[1..-1]
|
@@ -15,61 +15,75 @@ module BELParser
|
|
15
15
|
@locks ||= Concurrent::Hash.new
|
16
16
|
end
|
17
17
|
|
18
|
+
def load_threads
|
19
|
+
@load_threads ||= Concurrent::Hash.new
|
20
|
+
end
|
21
|
+
|
22
|
+
def load_values(identifier)
|
23
|
+
lock = locks[identifier] ||= Mutex.new
|
24
|
+
if identifier.include?('taxonomy')
|
25
|
+
puts "load_all_values, try lock..."
|
26
|
+
end
|
27
|
+
if lock.try_lock
|
28
|
+
load_threads[identifier] = Thread.new do
|
29
|
+
value_hash = {
|
30
|
+
:name => {},
|
31
|
+
:identifier => {},
|
32
|
+
:title => {},
|
33
|
+
:synonyms => {}
|
34
|
+
}
|
35
|
+
retrieve_values_from_resource(identifier).each do |concept|
|
36
|
+
value_hash[:name][concept.name] = concept
|
37
|
+
value_hash[:identifier][concept.identifier] = concept
|
38
|
+
value_hash[:title][concept.title] = concept
|
39
|
+
concept.synonyms.each do |synonym|
|
40
|
+
value_hash[:synonyms][synonym] = concept
|
41
|
+
end
|
42
|
+
end
|
43
|
+
resources[identifier] = value_hash
|
44
|
+
lock.unlock
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
18
49
|
def retrieve_resource(resource_identifier)
|
19
50
|
if !resources.key?(resource_identifier)
|
20
51
|
load_all_values(resource_identifier)
|
21
52
|
end
|
22
|
-
super
|
23
53
|
end
|
24
54
|
|
25
55
|
def retrieve_value_from_resource(resource_identifier, value)
|
26
56
|
if !resources.key?(resource_identifier)
|
27
57
|
load_all_values(resource_identifier)
|
58
|
+
if load_threads.key?(resource_identifier)
|
59
|
+
load_thread = load_threads[resource_identifier]
|
60
|
+
load_thread.join unless load_thread == Thread.current
|
61
|
+
end
|
28
62
|
end
|
29
63
|
|
30
64
|
concepts = resources[resource_identifier]
|
31
|
-
return
|
65
|
+
return nil unless concepts
|
32
66
|
CACHE_KEYS.each do |key|
|
33
67
|
cached_concept = concepts[key].fetch(value, nil)
|
34
68
|
return cached_concept if cached_concept
|
35
69
|
end
|
36
|
-
|
37
70
|
nil
|
38
71
|
end
|
39
72
|
|
40
73
|
def retrieve_values_from_resource(resource_identifier)
|
41
74
|
if !resources.key?(resource_identifier)
|
42
75
|
load_all_values(resource_identifier)
|
76
|
+
if load_threads.key?(resource_identifier)
|
77
|
+
load_thread = load_threads[resource_identifier]
|
78
|
+
load_thread.join unless load_thread == Thread.current
|
79
|
+
end
|
43
80
|
end
|
44
81
|
|
45
82
|
concepts = resources[resource_identifier]
|
46
|
-
return
|
83
|
+
return nil unless concepts
|
47
84
|
concepts[:name].values
|
48
85
|
end
|
49
86
|
|
50
|
-
def load_all_values(identifier)
|
51
|
-
lock = locks[identifier] ||= Mutex.new
|
52
|
-
if lock.try_lock
|
53
|
-
Thread.new do
|
54
|
-
value_hash = {
|
55
|
-
:name => {},
|
56
|
-
:identifier => {},
|
57
|
-
:title => {},
|
58
|
-
:synonyms => {}
|
59
|
-
}
|
60
|
-
retrieve_values_from_resource(identifier).each do |concept|
|
61
|
-
value_hash[:name][concept.name] = concept
|
62
|
-
value_hash[:identifier][concept.identifier] = concept
|
63
|
-
value_hash[:title][concept.title] = concept
|
64
|
-
concept.synonyms.each do |synonym|
|
65
|
-
value_hash[:synonyms][synonym] = concept
|
66
|
-
end
|
67
|
-
end
|
68
|
-
resources[identifier] = value_hash
|
69
|
-
lock.unlock
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
87
|
end
|
74
88
|
end
|
75
89
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require_relative 'dataset'
|
2
|
+
|
3
|
+
# FileResource is a {Dataset} description obtained from a Annotation or
|
4
|
+
# Namespace file.
|
5
|
+
module BELParser
|
6
|
+
module Resource
|
7
|
+
class FileResource
|
8
|
+
include Dataset
|
9
|
+
|
10
|
+
attr_reader :identifier, :domain, :keyword, :name, :types
|
11
|
+
|
12
|
+
def initialize(identifier, domain, keyword, name, type)
|
13
|
+
@identifier = identifier.to_s
|
14
|
+
@domain = domain.to_s
|
15
|
+
@keyword = keyword.to_s
|
16
|
+
@name = name.to_s
|
17
|
+
@types = [type]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require_relative 'value'
|
2
|
+
|
3
|
+
# FileResourceValue is a {Value} obtained from a Annotation or Namespace
|
4
|
+
# file.
|
5
|
+
module BELParser
|
6
|
+
module Resource
|
7
|
+
class FileResourceValue
|
8
|
+
include Value
|
9
|
+
|
10
|
+
attr_reader :dataset, :name, :encodings
|
11
|
+
|
12
|
+
def initialize(dataset, name, encodings)
|
13
|
+
@dataset = dataset
|
14
|
+
@name = name
|
15
|
+
@encodings =
|
16
|
+
if dataset.annotation_resource? && !dataset.namespace_resource?
|
17
|
+
nil
|
18
|
+
else
|
19
|
+
encodings.chars.map(&:to_sym)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -3,7 +3,7 @@ require 'concurrent/hash'
|
|
3
3
|
module BELParser
|
4
4
|
module Resource
|
5
5
|
# LRUCache implements a least recently used cache. This implementation was
|
6
|
-
#
|
6
|
+
# adapted from https://github.com/SamSaffron/lru_redux.
|
7
7
|
class LRUCache
|
8
8
|
def initialize(max_size)
|
9
9
|
raise ArgumentError.new(:max_size) if max_size < 1
|
@@ -4,30 +4,29 @@ module BELParser
|
|
4
4
|
module Resource
|
5
5
|
module LRUReader
|
6
6
|
|
7
|
-
LRU_MAX_SIZE =
|
7
|
+
LRU_MAX_SIZE = 2000
|
8
8
|
private_constant :LRU_MAX_SIZE
|
9
9
|
|
10
|
-
def resources
|
11
|
-
@resources ||= Concurrent::Hash.new
|
12
|
-
end
|
13
|
-
|
14
10
|
def retrieve_resource(resource_identifier)
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
11
|
+
@resources ||= Hash.new { |hash, key| hash[key] = {} }
|
12
|
+
cached_dataset = @resources[resource_identifier][:dataset]
|
13
|
+
return cached_dataset if cached_dataset
|
14
|
+
|
15
|
+
resolved_dataset = super
|
16
|
+
@resources[resource_identifier][:dataset] = resolved_dataset
|
17
|
+
@resources[resource_identifier][:values] = LRUCache.new(LRU_MAX_SIZE)
|
18
|
+
resolved_dataset
|
19
19
|
end
|
20
20
|
|
21
21
|
def retrieve_value_from_resource(resource_identifier, value)
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
concepts = resources[resource_identifier]
|
27
|
-
concepts.getset(value) { super }
|
22
|
+
retrieve_resource(resource_identifier)
|
23
|
+
@resources[resource_identifier][:values].getset(value) {
|
24
|
+
super
|
25
|
+
}
|
28
26
|
end
|
29
27
|
|
30
28
|
def retrieve_values_from_resource(resource_identifier)
|
29
|
+
retrieve_resource(resource_identifier)
|
31
30
|
super
|
32
31
|
end
|
33
32
|
end
|
@@ -0,0 +1,160 @@
|
|
1
|
+
require 'base64'
|
2
|
+
require 'digest'
|
3
|
+
require 'gdbm'
|
4
|
+
require 'net/http'
|
5
|
+
require 'tempfile'
|
6
|
+
require 'uri'
|
7
|
+
require_relative 'reader'
|
8
|
+
require_relative 'file_resource'
|
9
|
+
require_relative 'file_resource_value'
|
10
|
+
|
11
|
+
module BELParser
|
12
|
+
module Resource
|
13
|
+
# ResourceURLReader retrieves {Dataset datasets} and {Value values} from
|
14
|
+
# Annotation (i.e. belanno extension) and Namespace (i.e. belns extension)
|
15
|
+
# files. Value and encoding are stored in GDBM database files to reduce the
|
16
|
+
# runtime memory usage (22 resources loaded, totaling 100MB memory usage).
|
17
|
+
#
|
18
|
+
# Only supports resource identifiers with an HTTP or HTTPS scheme.
|
19
|
+
class ResourceURLReader
|
20
|
+
include Reader
|
21
|
+
|
22
|
+
DEFAULT_RESOURCE_VALUE_DELIMITER = '|'
|
23
|
+
private_constant :DEFAULT_RESOURCE_VALUE_DELIMITER
|
24
|
+
|
25
|
+
def initialize(reuse_database_files = false)
|
26
|
+
@resources = {}
|
27
|
+
@datasets = ::GDBM.new(_temporary_datasets_file)
|
28
|
+
@reuse = reuse_database_files
|
29
|
+
end
|
30
|
+
|
31
|
+
def retrieve_resource(resource_identifier)
|
32
|
+
read_resource(resource_identifier)[:dataset]
|
33
|
+
end
|
34
|
+
|
35
|
+
def retrieve_value_from_resource(resource_identifier, value)
|
36
|
+
resource = read_resource(resource_identifier)
|
37
|
+
encoding = resource[:values][value]
|
38
|
+
return nil unless encoding
|
39
|
+
FileResourceValue.new(resource[:dataset], value, encoding)
|
40
|
+
end
|
41
|
+
|
42
|
+
def retrieve_values_from_resource(resource_identifier)
|
43
|
+
resource = read_resource(resource_identifier)
|
44
|
+
dataset = resource[:dataset]
|
45
|
+
resource[:values].lazy.map do |value, encoding|
|
46
|
+
FileResourceValue.new(dataset, value, encoding)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
protected
|
51
|
+
|
52
|
+
def read_resource(url)
|
53
|
+
# return cached resource if present
|
54
|
+
resource = @resources[url]
|
55
|
+
return resource if resource
|
56
|
+
|
57
|
+
# read url
|
58
|
+
content = http_get(url)
|
59
|
+
unless content
|
60
|
+
# cache and return empty resource if not resolvable
|
61
|
+
return @resources[url] = empty_resource
|
62
|
+
end
|
63
|
+
|
64
|
+
@resources[url] = create_resource(url, content.each_line)
|
65
|
+
end
|
66
|
+
|
67
|
+
def create_resource(url, line_enum)
|
68
|
+
delimiter = DEFAULT_RESOURCE_VALUE_DELIMITER
|
69
|
+
dataset = @datasets[url]
|
70
|
+
value_database_file = _temporary_database_file(url)
|
71
|
+
values = ::GDBM.new(value_database_file)
|
72
|
+
|
73
|
+
if @reuse && dataset && values.size > 0
|
74
|
+
warn(
|
75
|
+
<<-MSG.gsub(/^ {14}/, '')
|
76
|
+
Warning - Reusing value database.
|
77
|
+
URL: #{url}
|
78
|
+
File: #{value_database_file}
|
79
|
+
MSG
|
80
|
+
)
|
81
|
+
return {
|
82
|
+
dataset: FileResource.new(url, *dataset.split('//')),
|
83
|
+
values: values
|
84
|
+
}
|
85
|
+
end
|
86
|
+
|
87
|
+
value_section = false
|
88
|
+
type, name, keyword, domain = nil
|
89
|
+
line_enum.each do |line|
|
90
|
+
line.strip!
|
91
|
+
case
|
92
|
+
when line =~ /^#{Regexp.escape('[AnnotationDefinition]')}/
|
93
|
+
type = Dataset::ANNOTATION
|
94
|
+
when line =~ /^#{Regexp.escape('[Namespace]')}/
|
95
|
+
type = Dataset::NAMESPACE
|
96
|
+
when line =~ /^NameString *= *(.*)$/
|
97
|
+
name = Regexp.last_match[1]
|
98
|
+
when line =~ /^Keyword *= *(.*)$/
|
99
|
+
keyword = Regexp.last_match[1]
|
100
|
+
when line =~ /^DomainString *= *(.*)$/
|
101
|
+
domain = Regexp.last_match[1]
|
102
|
+
when line =~ /^DelimiterString *=(.*)$/
|
103
|
+
delimiter = Regexp.last_match[1]
|
104
|
+
when line =~ /^#{Regexp.escape('[Values]')}/
|
105
|
+
dataset = FileResource.new(url, domain, keyword, name, type)
|
106
|
+
value_section = true
|
107
|
+
when value_section
|
108
|
+
value, encoding = line.strip.split(delimiter)
|
109
|
+
values[value.to_s] = encoding.to_s
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
resource = {
|
114
|
+
dataset: FileResource.new(url, domain, keyword, name, type),
|
115
|
+
values: values
|
116
|
+
}
|
117
|
+
@datasets[url] = [domain, keyword, name, type].join('//')
|
118
|
+
resource
|
119
|
+
end
|
120
|
+
|
121
|
+
def empty_resource
|
122
|
+
{ dataset: nil, values: {} }
|
123
|
+
end
|
124
|
+
|
125
|
+
def http_get(url)
|
126
|
+
begin
|
127
|
+
_get(URI.parse(url))
|
128
|
+
rescue URI::InvalidURIError, SocketError
|
129
|
+
return nil
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
private
|
134
|
+
|
135
|
+
def _get(url, &block)
|
136
|
+
Net::HTTP.start(url.host, url.port) do |http|
|
137
|
+
http.request(Net::HTTP::Get.new(url)) do |response|
|
138
|
+
return response.read_body
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def _hash_url(url)
|
144
|
+
Base64.encode64(Digest::SHA1.digest(url)).delete("/=\n")
|
145
|
+
end
|
146
|
+
|
147
|
+
def _temporary_datasets_file
|
148
|
+
resource_directory = File.join(Dir.tmpdir, 'belresources')
|
149
|
+
FileUtils.mkdir_p(resource_directory)
|
150
|
+
File.join(resource_directory, 'datasets.gdbm')
|
151
|
+
end
|
152
|
+
|
153
|
+
def _temporary_database_file(url)
|
154
|
+
resource_directory = File.join(Dir.tmpdir, 'belresources')
|
155
|
+
FileUtils.mkdir_p(resource_directory)
|
156
|
+
File.join(resource_directory, "#{_hash_url(url)}.gdbm")
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
@@ -98,10 +98,12 @@ end
|
|
98
98
|
if __FILE__ == $PROGRAM_NAME
|
99
99
|
$LOAD_PATH.unshift(
|
100
100
|
File.join(File.expand_path(File.dirname(__FILE__)), '..', '..'))
|
101
|
+
|
101
102
|
require 'bel_parser/language'
|
102
|
-
require 'bel_parser/resource/
|
103
|
+
require 'bel_parser/resource/resource_url_reader'
|
104
|
+
|
103
105
|
initial_state = {
|
104
|
-
resource_reader: BELParser::Resource::
|
106
|
+
resource_reader: BELParser::Resource::ResourceURLReader.new,
|
105
107
|
specification: BELParser::Language.specification(
|
106
108
|
BELParser::Language.latest_supported_version
|
107
109
|
)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bel_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.0.alpha.
|
4
|
+
version: 1.0.0.alpha.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Anthony Bargnesi
|
@@ -304,12 +304,13 @@ files:
|
|
304
304
|
- lib/bel_parser/resource/dataset.rb
|
305
305
|
- lib/bel_parser/resource/eager_reader.rb
|
306
306
|
- lib/bel_parser/resource/eager_sparql_reader.rb
|
307
|
-
- lib/bel_parser/resource/
|
307
|
+
- lib/bel_parser/resource/file_resource.rb
|
308
|
+
- lib/bel_parser/resource/file_resource_value.rb
|
308
309
|
- lib/bel_parser/resource/jena_tdb_reader.rb
|
309
310
|
- lib/bel_parser/resource/lru_cache.rb
|
310
311
|
- lib/bel_parser/resource/lru_reader.rb
|
311
312
|
- lib/bel_parser/resource/reader.rb
|
312
|
-
- lib/bel_parser/resource/
|
313
|
+
- lib/bel_parser/resource/resource_url_reader.rb
|
313
314
|
- lib/bel_parser/resource/sparql_reader.rb
|
314
315
|
- lib/bel_parser/resource/value.rb
|
315
316
|
- lib/bel_parser/script.rb
|
@@ -1,89 +0,0 @@
|
|
1
|
-
require 'base64'
|
2
|
-
require 'concurrent/hash'
|
3
|
-
require 'digest'
|
4
|
-
require 'fileutils'
|
5
|
-
require 'net/http'
|
6
|
-
require 'tempfile'
|
7
|
-
|
8
|
-
module BELParser
|
9
|
-
module Resource
|
10
|
-
class HTTPCache
|
11
|
-
|
12
|
-
# Lock (per URL) ensures concurrent retrieval/caching is thread-safe.
|
13
|
-
URL_LOCKS = Concurrent::Hash.new do |hash, key|
|
14
|
-
hash[key] = Mutex.new
|
15
|
-
end
|
16
|
-
private_constant :URL_LOCKS
|
17
|
-
|
18
|
-
def initialize(base_dir="#{Dir.tmpdir}/bel-resources")
|
19
|
-
@base_dir = FileUtils.mkdir_p(base_dir).first
|
20
|
-
end
|
21
|
-
|
22
|
-
def get(url)
|
23
|
-
cached_url_path = File.join(@base_dir, hash_url(url))
|
24
|
-
URL_LOCKS[url].synchronize do
|
25
|
-
if File.exists?(cached_url_path)
|
26
|
-
if block_given?
|
27
|
-
File.open(cached_url_path) do |cached_file|
|
28
|
-
yield cached_file
|
29
|
-
end
|
30
|
-
else
|
31
|
-
File.open(cached_url_path)
|
32
|
-
end
|
33
|
-
else
|
34
|
-
uri = URI.parse(url)
|
35
|
-
begin
|
36
|
-
Net::HTTP.start(uri.host, uri.port) do |http|
|
37
|
-
http.request(Net::HTTP::Get.new(uri)) do |response|
|
38
|
-
|
39
|
-
if block_given?
|
40
|
-
if response.is_a?(Net::HTTPOK)
|
41
|
-
cached_file = File.open(cached_url_path, 'w')
|
42
|
-
response.read_body do |chunk|
|
43
|
-
cached_file.write(chunk)
|
44
|
-
yield StringIO.new(chunk)
|
45
|
-
end
|
46
|
-
else
|
47
|
-
yield nil
|
48
|
-
end
|
49
|
-
else
|
50
|
-
if response.is_a?(Net::HTTPOK)
|
51
|
-
cached_file = File.open(cached_url_path, 'w')
|
52
|
-
content = response.read_body
|
53
|
-
cached_file.write(content)
|
54
|
-
return StringIO.new(content)
|
55
|
-
else
|
56
|
-
return nil
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
rescue SocketError
|
62
|
-
if block_given?
|
63
|
-
yield nil
|
64
|
-
else
|
65
|
-
return nil
|
66
|
-
end
|
67
|
-
ensure
|
68
|
-
if defined? cached_file
|
69
|
-
cached_file.close
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
protected
|
77
|
-
|
78
|
-
def hash_url(url)
|
79
|
-
Base64.encode64(Digest::SHA1.digest(url)).delete("/=\n")
|
80
|
-
end
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
if __FILE__ == $PROGRAM_NAME
|
86
|
-
BELParser::Resource::HTTPCache.new.get(ARGV.first) do |io|
|
87
|
-
io.each_line { |line| puts line }
|
88
|
-
end
|
89
|
-
end
|
@@ -1,135 +0,0 @@
|
|
1
|
-
require 'open-uri'
|
2
|
-
require_relative 'reader'
|
3
|
-
require_relative 'eager_reader'
|
4
|
-
require_relative 'dataset'
|
5
|
-
require_relative 'http_cache'
|
6
|
-
require_relative 'value'
|
7
|
-
|
8
|
-
module BELParser
|
9
|
-
module Resource
|
10
|
-
# ResourceFileReader retrieves {Dataset datasets} and {Value values} from
|
11
|
-
# Annotation (i.e. belanno extension) and Namespace (i.e. belns extension)
|
12
|
-
# files.
|
13
|
-
#
|
14
|
-
# Only supports resource identifiers with an HTTP or HTTPS scheme.
|
15
|
-
class ResourceFileReader
|
16
|
-
include Reader
|
17
|
-
prepend EagerReader
|
18
|
-
|
19
|
-
DEFAULT_RESOURCE_VALUE_DELIMITER = '|'
|
20
|
-
private_constant :DEFAULT_RESOURCE_VALUE_DELIMITER
|
21
|
-
|
22
|
-
def initialize
|
23
|
-
@http_cache = HTTPCache.new
|
24
|
-
end
|
25
|
-
|
26
|
-
def retrieve_resource(resource_identifier)
|
27
|
-
read_resource(resource_identifier)
|
28
|
-
end
|
29
|
-
|
30
|
-
def retrieve_value_from_resource(resource_identifier, value)
|
31
|
-
file_resource = read_resource(resource_identifier)
|
32
|
-
|
33
|
-
io = @http_cache.get(resource_identifier)
|
34
|
-
delimiter = DEFAULT_RESOURCE_VALUE_DELIMITER
|
35
|
-
io.each_line do |line|
|
36
|
-
case line
|
37
|
-
when /^DelimiterString *=(.*)/
|
38
|
-
delimiter = Regexp.last_match[1]
|
39
|
-
when /^#{Regexp.escape('[Values]')}/
|
40
|
-
break
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
value_match = "#{value}#{delimiter}"
|
45
|
-
value_line = io.each_line.find do |line|
|
46
|
-
line.start_with?(value_match)
|
47
|
-
end
|
48
|
-
return nil unless value_line
|
49
|
-
value, encoding = value_line.strip.split(delimiter)
|
50
|
-
FileResourceValue.new(file_resource, value, encoding)
|
51
|
-
end
|
52
|
-
|
53
|
-
def retrieve_values_from_resource(resource_identifier)
|
54
|
-
file_resource = read_resource(resource_identifier)
|
55
|
-
|
56
|
-
io = @http_cache.get(resource_identifier)
|
57
|
-
delimiter = DEFAULT_RESOURCE_VALUE_DELIMITER
|
58
|
-
io.each_line do |line|
|
59
|
-
case line
|
60
|
-
when /^DelimiterString *=(.*)/
|
61
|
-
delimiter = Regexp.last_match[1]
|
62
|
-
when /^#{Regexp.escape('[Values]')}/
|
63
|
-
break
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
io.each_line.map do |line|
|
68
|
-
value, encoding = line.strip.split(delimiter)
|
69
|
-
FileResourceValue.new(file_resource, value, encoding)
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
private
|
74
|
-
|
75
|
-
def read_resource(url)
|
76
|
-
io = @http_cache.get(url)
|
77
|
-
return nil unless io
|
78
|
-
|
79
|
-
type, name, keyword, domain = nil
|
80
|
-
io.each_line do |line|
|
81
|
-
case line
|
82
|
-
when /^#{Regexp.escape('[AnnotationDefinition]')}/
|
83
|
-
type = Dataset::ANNOTATION
|
84
|
-
when /^#{Regexp.escape('[Namespace]')}/
|
85
|
-
type = Dataset::NAMESPACE
|
86
|
-
when /^NameString *= *(.*)$/
|
87
|
-
name = Regexp.last_match[1]
|
88
|
-
when /^Keyword *= *(.*)$/
|
89
|
-
keyword = Regexp.last_match[1]
|
90
|
-
when /^DomainString *= *(.*)$/
|
91
|
-
domain = Regexp.last_match[1]
|
92
|
-
when /^#{Regexp.escape('[Values]')}/
|
93
|
-
break
|
94
|
-
end
|
95
|
-
end
|
96
|
-
FileResource.new(url, domain, keyword, name, type)
|
97
|
-
end
|
98
|
-
|
99
|
-
# FileResource is a {Dataset} description obtained from a Annotation or
|
100
|
-
# Namespace file.
|
101
|
-
class FileResource
|
102
|
-
include Dataset
|
103
|
-
|
104
|
-
attr_reader :identifier, :domain, :keyword, :name, :types
|
105
|
-
|
106
|
-
def initialize(identifier, domain, keyword, name, type)
|
107
|
-
@identifier = identifier.to_s
|
108
|
-
@domain = domain.to_s
|
109
|
-
@keyword = keyword.to_s
|
110
|
-
@name = name.to_s
|
111
|
-
@types = [type]
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
# FileResourceValue is a {Value} obtained from a Annotation or Namespace
|
116
|
-
# file.
|
117
|
-
class FileResourceValue
|
118
|
-
include Value
|
119
|
-
|
120
|
-
attr_reader :dataset, :name, :encodings
|
121
|
-
|
122
|
-
def initialize(dataset, name, encodings)
|
123
|
-
@dataset = dataset
|
124
|
-
@name = name
|
125
|
-
@encodings =
|
126
|
-
if dataset.annotation_resource? && !dataset.namespace_resource?
|
127
|
-
nil
|
128
|
-
else
|
129
|
-
encodings.chars.map(&:to_sym)
|
130
|
-
end
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|
134
|
-
end
|
135
|
-
end
|