bel_parser 1.0.0.alpha.16 → 1.0.0.alpha.17
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/bin/bel2_validator +11 -3
- data/bin/bel_script_reader +11 -3
- data/lib/bel_parser/expression/validator.rb +2 -2
- data/lib/bel_parser/resource/eager_reader.rb +41 -27
- data/lib/bel_parser/resource/file_resource.rb +21 -0
- data/lib/bel_parser/resource/file_resource_value.rb +24 -0
- data/lib/bel_parser/resource/lru_cache.rb +1 -1
- data/lib/bel_parser/resource/lru_reader.rb +14 -15
- data/lib/bel_parser/resource/resource_url_reader.rb +160 -0
- data/lib/bel_parser/script/validator.rb +4 -2
- metadata +4 -3
- data/lib/bel_parser/resource/http_cache.rb +0 -89
- data/lib/bel_parser/resource/resource_file_reader.rb +0 -135
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 21d0de137c850359607608e5cdca2a4481934fd5
|
4
|
+
data.tar.gz: aebe4a51435a51ee91d410a2c5ff0753ec3d51d2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 51d92527013676307ee3307496f1282fe838a9f57b138e1d74d6dcdb49cd7061fd913c26111422f9e1fcad3c61dcbd1ead176996ec90e20acc2e272b49de710b
|
7
|
+
data.tar.gz: edf6a891d9d6d9ba08dcef2cf4f6090cba96b040b931c8ee7aa39296bc7a6a305b81b45c2e617254b9b293a92a0e90db80756327b2b6e90430b1c8b68caeb184
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0.0.alpha.
|
1
|
+
1.0.0.alpha.17
|
data/bin/bel2_validator
CHANGED
@@ -9,7 +9,6 @@ unless ARGV.first
|
|
9
9
|
USAGE
|
10
10
|
exit 1
|
11
11
|
end
|
12
|
-
namespaces = Hash[ARGV[1..-1].map { |ns| ns.split('=') }]
|
13
12
|
|
14
13
|
def syntax_results(results)
|
15
14
|
results.select do |res|
|
@@ -25,9 +24,18 @@ end
|
|
25
24
|
|
26
25
|
require 'bel_parser'
|
27
26
|
require 'bel_parser/expression/validator'
|
28
|
-
require 'bel_parser/resource/
|
27
|
+
require 'bel_parser/resource/resource_url_reader'
|
28
|
+
|
29
|
+
resource_reader = BELParser::Resource::ResourceURLReader.new(true)
|
30
|
+
namespaces =
|
31
|
+
Hash[
|
32
|
+
ARGV.map do |ns|
|
33
|
+
prefix, identifier = ns.split('=')
|
34
|
+
dataset = resource_reader.retrieve_resource(identifier)
|
35
|
+
dataset ? [prefix, dataset] : nil
|
36
|
+
end.compact
|
37
|
+
]
|
29
38
|
|
30
|
-
resource_reader = BELParser::Resource::ResourceFileReader.new
|
31
39
|
BELParser::Expression::Validator
|
32
40
|
.new(ARGV.first, namespaces, resource_reader)
|
33
41
|
.each($stdin) do |(line_number, line, ast, results)|
|
data/bin/bel_script_reader
CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(
|
|
4
4
|
|
5
5
|
require 'optparse'
|
6
6
|
require 'bel_parser'
|
7
|
-
require 'bel_parser/resource/
|
7
|
+
require 'bel_parser/resource/resource_url_reader'
|
8
8
|
|
9
9
|
options = {
|
10
10
|
spec: BELParser::Language.specification(
|
@@ -37,6 +37,14 @@ OptionParser.new do |opts|
|
|
37
37
|
|
38
38
|
options[:spec] = BELParser::Language.specification(spec)
|
39
39
|
end
|
40
|
+
|
41
|
+
opts.on(
|
42
|
+
'-r',
|
43
|
+
'--[no-]reuse-resource-databases',
|
44
|
+
'Enable to reuse resource databases if they exist.') do |reuse|
|
45
|
+
|
46
|
+
options[:reuse] = reuse
|
47
|
+
end
|
40
48
|
end.parse!
|
41
49
|
|
42
50
|
file, spec = options.values_at(:file, :spec)
|
@@ -70,7 +78,7 @@ SYN_ERR = BELParser::Language::Syntax::SyntaxError
|
|
70
78
|
SYN_WARN = BELParser::Language::Syntax::SyntaxWarning
|
71
79
|
SEM_WARN = BELParser::Language::Semantics::SemanticsWarning
|
72
80
|
|
73
|
-
rr
|
81
|
+
rr = BELParser::Resource::ResourceURLReader.new(options[:reuse])
|
74
82
|
namespaces = Hash[
|
75
83
|
ARGV.map do |ns|
|
76
84
|
prefix, identifier = ns.split('=')
|
@@ -80,7 +88,7 @@ namespaces = Hash[
|
|
80
88
|
]
|
81
89
|
|
82
90
|
initial_state = {
|
83
|
-
resource_reader:
|
91
|
+
resource_reader: rr,
|
84
92
|
specification: spec,
|
85
93
|
namespace_definitions: namespaces
|
86
94
|
}
|
@@ -49,9 +49,9 @@ if __FILE__ == $PROGRAM_NAME
|
|
49
49
|
exit 1
|
50
50
|
end
|
51
51
|
|
52
|
-
require 'bel_parser/resource/
|
52
|
+
require 'bel_parser/resource/resource_url_reader'
|
53
53
|
|
54
|
-
resource_reader = BELParser::Resource::
|
54
|
+
resource_reader = BELParser::Resource::ResourceURLReader.new
|
55
55
|
namespaces =
|
56
56
|
Hash[
|
57
57
|
ARGV[1..-1]
|
@@ -15,61 +15,75 @@ module BELParser
|
|
15
15
|
@locks ||= Concurrent::Hash.new
|
16
16
|
end
|
17
17
|
|
18
|
+
def load_threads
|
19
|
+
@load_threads ||= Concurrent::Hash.new
|
20
|
+
end
|
21
|
+
|
22
|
+
def load_values(identifier)
|
23
|
+
lock = locks[identifier] ||= Mutex.new
|
24
|
+
if identifier.include?('taxonomy')
|
25
|
+
puts "load_all_values, try lock..."
|
26
|
+
end
|
27
|
+
if lock.try_lock
|
28
|
+
load_threads[identifier] = Thread.new do
|
29
|
+
value_hash = {
|
30
|
+
:name => {},
|
31
|
+
:identifier => {},
|
32
|
+
:title => {},
|
33
|
+
:synonyms => {}
|
34
|
+
}
|
35
|
+
retrieve_values_from_resource(identifier).each do |concept|
|
36
|
+
value_hash[:name][concept.name] = concept
|
37
|
+
value_hash[:identifier][concept.identifier] = concept
|
38
|
+
value_hash[:title][concept.title] = concept
|
39
|
+
concept.synonyms.each do |synonym|
|
40
|
+
value_hash[:synonyms][synonym] = concept
|
41
|
+
end
|
42
|
+
end
|
43
|
+
resources[identifier] = value_hash
|
44
|
+
lock.unlock
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
18
49
|
def retrieve_resource(resource_identifier)
|
19
50
|
if !resources.key?(resource_identifier)
|
20
51
|
load_all_values(resource_identifier)
|
21
52
|
end
|
22
|
-
super
|
23
53
|
end
|
24
54
|
|
25
55
|
def retrieve_value_from_resource(resource_identifier, value)
|
26
56
|
if !resources.key?(resource_identifier)
|
27
57
|
load_all_values(resource_identifier)
|
58
|
+
if load_threads.key?(resource_identifier)
|
59
|
+
load_thread = load_threads[resource_identifier]
|
60
|
+
load_thread.join unless load_thread == Thread.current
|
61
|
+
end
|
28
62
|
end
|
29
63
|
|
30
64
|
concepts = resources[resource_identifier]
|
31
|
-
return
|
65
|
+
return nil unless concepts
|
32
66
|
CACHE_KEYS.each do |key|
|
33
67
|
cached_concept = concepts[key].fetch(value, nil)
|
34
68
|
return cached_concept if cached_concept
|
35
69
|
end
|
36
|
-
|
37
70
|
nil
|
38
71
|
end
|
39
72
|
|
40
73
|
def retrieve_values_from_resource(resource_identifier)
|
41
74
|
if !resources.key?(resource_identifier)
|
42
75
|
load_all_values(resource_identifier)
|
76
|
+
if load_threads.key?(resource_identifier)
|
77
|
+
load_thread = load_threads[resource_identifier]
|
78
|
+
load_thread.join unless load_thread == Thread.current
|
79
|
+
end
|
43
80
|
end
|
44
81
|
|
45
82
|
concepts = resources[resource_identifier]
|
46
|
-
return
|
83
|
+
return nil unless concepts
|
47
84
|
concepts[:name].values
|
48
85
|
end
|
49
86
|
|
50
|
-
def load_all_values(identifier)
|
51
|
-
lock = locks[identifier] ||= Mutex.new
|
52
|
-
if lock.try_lock
|
53
|
-
Thread.new do
|
54
|
-
value_hash = {
|
55
|
-
:name => {},
|
56
|
-
:identifier => {},
|
57
|
-
:title => {},
|
58
|
-
:synonyms => {}
|
59
|
-
}
|
60
|
-
retrieve_values_from_resource(identifier).each do |concept|
|
61
|
-
value_hash[:name][concept.name] = concept
|
62
|
-
value_hash[:identifier][concept.identifier] = concept
|
63
|
-
value_hash[:title][concept.title] = concept
|
64
|
-
concept.synonyms.each do |synonym|
|
65
|
-
value_hash[:synonyms][synonym] = concept
|
66
|
-
end
|
67
|
-
end
|
68
|
-
resources[identifier] = value_hash
|
69
|
-
lock.unlock
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
87
|
end
|
74
88
|
end
|
75
89
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require_relative 'dataset'
|
2
|
+
|
3
|
+
# FileResource is a {Dataset} description obtained from a Annotation or
|
4
|
+
# Namespace file.
|
5
|
+
module BELParser
|
6
|
+
module Resource
|
7
|
+
class FileResource
|
8
|
+
include Dataset
|
9
|
+
|
10
|
+
attr_reader :identifier, :domain, :keyword, :name, :types
|
11
|
+
|
12
|
+
def initialize(identifier, domain, keyword, name, type)
|
13
|
+
@identifier = identifier.to_s
|
14
|
+
@domain = domain.to_s
|
15
|
+
@keyword = keyword.to_s
|
16
|
+
@name = name.to_s
|
17
|
+
@types = [type]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require_relative 'value'
|
2
|
+
|
3
|
+
# FileResourceValue is a {Value} obtained from a Annotation or Namespace
|
4
|
+
# file.
|
5
|
+
module BELParser
|
6
|
+
module Resource
|
7
|
+
class FileResourceValue
|
8
|
+
include Value
|
9
|
+
|
10
|
+
attr_reader :dataset, :name, :encodings
|
11
|
+
|
12
|
+
def initialize(dataset, name, encodings)
|
13
|
+
@dataset = dataset
|
14
|
+
@name = name
|
15
|
+
@encodings =
|
16
|
+
if dataset.annotation_resource? && !dataset.namespace_resource?
|
17
|
+
nil
|
18
|
+
else
|
19
|
+
encodings.chars.map(&:to_sym)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -3,7 +3,7 @@ require 'concurrent/hash'
|
|
3
3
|
module BELParser
|
4
4
|
module Resource
|
5
5
|
# LRUCache implements a least recently used cache. This implementation was
|
6
|
-
#
|
6
|
+
# adapted from https://github.com/SamSaffron/lru_redux.
|
7
7
|
class LRUCache
|
8
8
|
def initialize(max_size)
|
9
9
|
raise ArgumentError.new(:max_size) if max_size < 1
|
@@ -4,30 +4,29 @@ module BELParser
|
|
4
4
|
module Resource
|
5
5
|
module LRUReader
|
6
6
|
|
7
|
-
LRU_MAX_SIZE =
|
7
|
+
LRU_MAX_SIZE = 2000
|
8
8
|
private_constant :LRU_MAX_SIZE
|
9
9
|
|
10
|
-
def resources
|
11
|
-
@resources ||= Concurrent::Hash.new
|
12
|
-
end
|
13
|
-
|
14
10
|
def retrieve_resource(resource_identifier)
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
11
|
+
@resources ||= Hash.new { |hash, key| hash[key] = {} }
|
12
|
+
cached_dataset = @resources[resource_identifier][:dataset]
|
13
|
+
return cached_dataset if cached_dataset
|
14
|
+
|
15
|
+
resolved_dataset = super
|
16
|
+
@resources[resource_identifier][:dataset] = resolved_dataset
|
17
|
+
@resources[resource_identifier][:values] = LRUCache.new(LRU_MAX_SIZE)
|
18
|
+
resolved_dataset
|
19
19
|
end
|
20
20
|
|
21
21
|
def retrieve_value_from_resource(resource_identifier, value)
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
concepts = resources[resource_identifier]
|
27
|
-
concepts.getset(value) { super }
|
22
|
+
retrieve_resource(resource_identifier)
|
23
|
+
@resources[resource_identifier][:values].getset(value) {
|
24
|
+
super
|
25
|
+
}
|
28
26
|
end
|
29
27
|
|
30
28
|
def retrieve_values_from_resource(resource_identifier)
|
29
|
+
retrieve_resource(resource_identifier)
|
31
30
|
super
|
32
31
|
end
|
33
32
|
end
|
@@ -0,0 +1,160 @@
|
|
1
|
+
require 'base64'
|
2
|
+
require 'digest'
|
3
|
+
require 'gdbm'
|
4
|
+
require 'net/http'
|
5
|
+
require 'tempfile'
|
6
|
+
require 'uri'
|
7
|
+
require_relative 'reader'
|
8
|
+
require_relative 'file_resource'
|
9
|
+
require_relative 'file_resource_value'
|
10
|
+
|
11
|
+
module BELParser
|
12
|
+
module Resource
|
13
|
+
# ResourceURLReader retrieves {Dataset datasets} and {Value values} from
|
14
|
+
# Annotation (i.e. belanno extension) and Namespace (i.e. belns extension)
|
15
|
+
# files. Value and encoding are stored in GDBM database files to reduce the
|
16
|
+
# runtime memory usage (22 resources loaded, totaling 100MB memory usage).
|
17
|
+
#
|
18
|
+
# Only supports resource identifiers with an HTTP or HTTPS scheme.
|
19
|
+
class ResourceURLReader
|
20
|
+
include Reader
|
21
|
+
|
22
|
+
DEFAULT_RESOURCE_VALUE_DELIMITER = '|'
|
23
|
+
private_constant :DEFAULT_RESOURCE_VALUE_DELIMITER
|
24
|
+
|
25
|
+
def initialize(reuse_database_files = false)
|
26
|
+
@resources = {}
|
27
|
+
@datasets = ::GDBM.new(_temporary_datasets_file)
|
28
|
+
@reuse = reuse_database_files
|
29
|
+
end
|
30
|
+
|
31
|
+
def retrieve_resource(resource_identifier)
|
32
|
+
read_resource(resource_identifier)[:dataset]
|
33
|
+
end
|
34
|
+
|
35
|
+
def retrieve_value_from_resource(resource_identifier, value)
|
36
|
+
resource = read_resource(resource_identifier)
|
37
|
+
encoding = resource[:values][value]
|
38
|
+
return nil unless encoding
|
39
|
+
FileResourceValue.new(resource[:dataset], value, encoding)
|
40
|
+
end
|
41
|
+
|
42
|
+
def retrieve_values_from_resource(resource_identifier)
|
43
|
+
resource = read_resource(resource_identifier)
|
44
|
+
dataset = resource[:dataset]
|
45
|
+
resource[:values].lazy.map do |value, encoding|
|
46
|
+
FileResourceValue.new(dataset, value, encoding)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
protected
|
51
|
+
|
52
|
+
def read_resource(url)
|
53
|
+
# return cached resource if present
|
54
|
+
resource = @resources[url]
|
55
|
+
return resource if resource
|
56
|
+
|
57
|
+
# read url
|
58
|
+
content = http_get(url)
|
59
|
+
unless content
|
60
|
+
# cache and return empty resource if not resolvable
|
61
|
+
return @resources[url] = empty_resource
|
62
|
+
end
|
63
|
+
|
64
|
+
@resources[url] = create_resource(url, content.each_line)
|
65
|
+
end
|
66
|
+
|
67
|
+
def create_resource(url, line_enum)
|
68
|
+
delimiter = DEFAULT_RESOURCE_VALUE_DELIMITER
|
69
|
+
dataset = @datasets[url]
|
70
|
+
value_database_file = _temporary_database_file(url)
|
71
|
+
values = ::GDBM.new(value_database_file)
|
72
|
+
|
73
|
+
if @reuse && dataset && values.size > 0
|
74
|
+
warn(
|
75
|
+
<<-MSG.gsub(/^ {14}/, '')
|
76
|
+
Warning - Reusing value database.
|
77
|
+
URL: #{url}
|
78
|
+
File: #{value_database_file}
|
79
|
+
MSG
|
80
|
+
)
|
81
|
+
return {
|
82
|
+
dataset: FileResource.new(url, *dataset.split('//')),
|
83
|
+
values: values
|
84
|
+
}
|
85
|
+
end
|
86
|
+
|
87
|
+
value_section = false
|
88
|
+
type, name, keyword, domain = nil
|
89
|
+
line_enum.each do |line|
|
90
|
+
line.strip!
|
91
|
+
case
|
92
|
+
when line =~ /^#{Regexp.escape('[AnnotationDefinition]')}/
|
93
|
+
type = Dataset::ANNOTATION
|
94
|
+
when line =~ /^#{Regexp.escape('[Namespace]')}/
|
95
|
+
type = Dataset::NAMESPACE
|
96
|
+
when line =~ /^NameString *= *(.*)$/
|
97
|
+
name = Regexp.last_match[1]
|
98
|
+
when line =~ /^Keyword *= *(.*)$/
|
99
|
+
keyword = Regexp.last_match[1]
|
100
|
+
when line =~ /^DomainString *= *(.*)$/
|
101
|
+
domain = Regexp.last_match[1]
|
102
|
+
when line =~ /^DelimiterString *=(.*)$/
|
103
|
+
delimiter = Regexp.last_match[1]
|
104
|
+
when line =~ /^#{Regexp.escape('[Values]')}/
|
105
|
+
dataset = FileResource.new(url, domain, keyword, name, type)
|
106
|
+
value_section = true
|
107
|
+
when value_section
|
108
|
+
value, encoding = line.strip.split(delimiter)
|
109
|
+
values[value.to_s] = encoding.to_s
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
resource = {
|
114
|
+
dataset: FileResource.new(url, domain, keyword, name, type),
|
115
|
+
values: values
|
116
|
+
}
|
117
|
+
@datasets[url] = [domain, keyword, name, type].join('//')
|
118
|
+
resource
|
119
|
+
end
|
120
|
+
|
121
|
+
def empty_resource
|
122
|
+
{ dataset: nil, values: {} }
|
123
|
+
end
|
124
|
+
|
125
|
+
def http_get(url)
|
126
|
+
begin
|
127
|
+
_get(URI.parse(url))
|
128
|
+
rescue URI::InvalidURIError, SocketError
|
129
|
+
return nil
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
private
|
134
|
+
|
135
|
+
def _get(url, &block)
|
136
|
+
Net::HTTP.start(url.host, url.port) do |http|
|
137
|
+
http.request(Net::HTTP::Get.new(url)) do |response|
|
138
|
+
return response.read_body
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def _hash_url(url)
|
144
|
+
Base64.encode64(Digest::SHA1.digest(url)).delete("/=\n")
|
145
|
+
end
|
146
|
+
|
147
|
+
def _temporary_datasets_file
|
148
|
+
resource_directory = File.join(Dir.tmpdir, 'belresources')
|
149
|
+
FileUtils.mkdir_p(resource_directory)
|
150
|
+
File.join(resource_directory, 'datasets.gdbm')
|
151
|
+
end
|
152
|
+
|
153
|
+
def _temporary_database_file(url)
|
154
|
+
resource_directory = File.join(Dir.tmpdir, 'belresources')
|
155
|
+
FileUtils.mkdir_p(resource_directory)
|
156
|
+
File.join(resource_directory, "#{_hash_url(url)}.gdbm")
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
@@ -98,10 +98,12 @@ end
|
|
98
98
|
if __FILE__ == $PROGRAM_NAME
|
99
99
|
$LOAD_PATH.unshift(
|
100
100
|
File.join(File.expand_path(File.dirname(__FILE__)), '..', '..'))
|
101
|
+
|
101
102
|
require 'bel_parser/language'
|
102
|
-
require 'bel_parser/resource/
|
103
|
+
require 'bel_parser/resource/resource_url_reader'
|
104
|
+
|
103
105
|
initial_state = {
|
104
|
-
resource_reader: BELParser::Resource::
|
106
|
+
resource_reader: BELParser::Resource::ResourceURLReader.new,
|
105
107
|
specification: BELParser::Language.specification(
|
106
108
|
BELParser::Language.latest_supported_version
|
107
109
|
)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bel_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.0.alpha.
|
4
|
+
version: 1.0.0.alpha.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Anthony Bargnesi
|
@@ -304,12 +304,13 @@ files:
|
|
304
304
|
- lib/bel_parser/resource/dataset.rb
|
305
305
|
- lib/bel_parser/resource/eager_reader.rb
|
306
306
|
- lib/bel_parser/resource/eager_sparql_reader.rb
|
307
|
-
- lib/bel_parser/resource/
|
307
|
+
- lib/bel_parser/resource/file_resource.rb
|
308
|
+
- lib/bel_parser/resource/file_resource_value.rb
|
308
309
|
- lib/bel_parser/resource/jena_tdb_reader.rb
|
309
310
|
- lib/bel_parser/resource/lru_cache.rb
|
310
311
|
- lib/bel_parser/resource/lru_reader.rb
|
311
312
|
- lib/bel_parser/resource/reader.rb
|
312
|
-
- lib/bel_parser/resource/
|
313
|
+
- lib/bel_parser/resource/resource_url_reader.rb
|
313
314
|
- lib/bel_parser/resource/sparql_reader.rb
|
314
315
|
- lib/bel_parser/resource/value.rb
|
315
316
|
- lib/bel_parser/script.rb
|
@@ -1,89 +0,0 @@
|
|
1
|
-
require 'base64'
|
2
|
-
require 'concurrent/hash'
|
3
|
-
require 'digest'
|
4
|
-
require 'fileutils'
|
5
|
-
require 'net/http'
|
6
|
-
require 'tempfile'
|
7
|
-
|
8
|
-
module BELParser
|
9
|
-
module Resource
|
10
|
-
class HTTPCache
|
11
|
-
|
12
|
-
# Lock (per URL) ensures concurrent retrieval/caching is thread-safe.
|
13
|
-
URL_LOCKS = Concurrent::Hash.new do |hash, key|
|
14
|
-
hash[key] = Mutex.new
|
15
|
-
end
|
16
|
-
private_constant :URL_LOCKS
|
17
|
-
|
18
|
-
def initialize(base_dir="#{Dir.tmpdir}/bel-resources")
|
19
|
-
@base_dir = FileUtils.mkdir_p(base_dir).first
|
20
|
-
end
|
21
|
-
|
22
|
-
def get(url)
|
23
|
-
cached_url_path = File.join(@base_dir, hash_url(url))
|
24
|
-
URL_LOCKS[url].synchronize do
|
25
|
-
if File.exists?(cached_url_path)
|
26
|
-
if block_given?
|
27
|
-
File.open(cached_url_path) do |cached_file|
|
28
|
-
yield cached_file
|
29
|
-
end
|
30
|
-
else
|
31
|
-
File.open(cached_url_path)
|
32
|
-
end
|
33
|
-
else
|
34
|
-
uri = URI.parse(url)
|
35
|
-
begin
|
36
|
-
Net::HTTP.start(uri.host, uri.port) do |http|
|
37
|
-
http.request(Net::HTTP::Get.new(uri)) do |response|
|
38
|
-
|
39
|
-
if block_given?
|
40
|
-
if response.is_a?(Net::HTTPOK)
|
41
|
-
cached_file = File.open(cached_url_path, 'w')
|
42
|
-
response.read_body do |chunk|
|
43
|
-
cached_file.write(chunk)
|
44
|
-
yield StringIO.new(chunk)
|
45
|
-
end
|
46
|
-
else
|
47
|
-
yield nil
|
48
|
-
end
|
49
|
-
else
|
50
|
-
if response.is_a?(Net::HTTPOK)
|
51
|
-
cached_file = File.open(cached_url_path, 'w')
|
52
|
-
content = response.read_body
|
53
|
-
cached_file.write(content)
|
54
|
-
return StringIO.new(content)
|
55
|
-
else
|
56
|
-
return nil
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
rescue SocketError
|
62
|
-
if block_given?
|
63
|
-
yield nil
|
64
|
-
else
|
65
|
-
return nil
|
66
|
-
end
|
67
|
-
ensure
|
68
|
-
if defined? cached_file
|
69
|
-
cached_file.close
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
protected
|
77
|
-
|
78
|
-
def hash_url(url)
|
79
|
-
Base64.encode64(Digest::SHA1.digest(url)).delete("/=\n")
|
80
|
-
end
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
if __FILE__ == $PROGRAM_NAME
|
86
|
-
BELParser::Resource::HTTPCache.new.get(ARGV.first) do |io|
|
87
|
-
io.each_line { |line| puts line }
|
88
|
-
end
|
89
|
-
end
|
@@ -1,135 +0,0 @@
|
|
1
|
-
require 'open-uri'
|
2
|
-
require_relative 'reader'
|
3
|
-
require_relative 'eager_reader'
|
4
|
-
require_relative 'dataset'
|
5
|
-
require_relative 'http_cache'
|
6
|
-
require_relative 'value'
|
7
|
-
|
8
|
-
module BELParser
|
9
|
-
module Resource
|
10
|
-
# ResourceFileReader retrieves {Dataset datasets} and {Value values} from
|
11
|
-
# Annotation (i.e. belanno extension) and Namespace (i.e. belns extension)
|
12
|
-
# files.
|
13
|
-
#
|
14
|
-
# Only supports resource identifiers with an HTTP or HTTPS scheme.
|
15
|
-
class ResourceFileReader
|
16
|
-
include Reader
|
17
|
-
prepend EagerReader
|
18
|
-
|
19
|
-
DEFAULT_RESOURCE_VALUE_DELIMITER = '|'
|
20
|
-
private_constant :DEFAULT_RESOURCE_VALUE_DELIMITER
|
21
|
-
|
22
|
-
def initialize
|
23
|
-
@http_cache = HTTPCache.new
|
24
|
-
end
|
25
|
-
|
26
|
-
def retrieve_resource(resource_identifier)
|
27
|
-
read_resource(resource_identifier)
|
28
|
-
end
|
29
|
-
|
30
|
-
def retrieve_value_from_resource(resource_identifier, value)
|
31
|
-
file_resource = read_resource(resource_identifier)
|
32
|
-
|
33
|
-
io = @http_cache.get(resource_identifier)
|
34
|
-
delimiter = DEFAULT_RESOURCE_VALUE_DELIMITER
|
35
|
-
io.each_line do |line|
|
36
|
-
case line
|
37
|
-
when /^DelimiterString *=(.*)/
|
38
|
-
delimiter = Regexp.last_match[1]
|
39
|
-
when /^#{Regexp.escape('[Values]')}/
|
40
|
-
break
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
value_match = "#{value}#{delimiter}"
|
45
|
-
value_line = io.each_line.find do |line|
|
46
|
-
line.start_with?(value_match)
|
47
|
-
end
|
48
|
-
return nil unless value_line
|
49
|
-
value, encoding = value_line.strip.split(delimiter)
|
50
|
-
FileResourceValue.new(file_resource, value, encoding)
|
51
|
-
end
|
52
|
-
|
53
|
-
def retrieve_values_from_resource(resource_identifier)
|
54
|
-
file_resource = read_resource(resource_identifier)
|
55
|
-
|
56
|
-
io = @http_cache.get(resource_identifier)
|
57
|
-
delimiter = DEFAULT_RESOURCE_VALUE_DELIMITER
|
58
|
-
io.each_line do |line|
|
59
|
-
case line
|
60
|
-
when /^DelimiterString *=(.*)/
|
61
|
-
delimiter = Regexp.last_match[1]
|
62
|
-
when /^#{Regexp.escape('[Values]')}/
|
63
|
-
break
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
io.each_line.map do |line|
|
68
|
-
value, encoding = line.strip.split(delimiter)
|
69
|
-
FileResourceValue.new(file_resource, value, encoding)
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
private
|
74
|
-
|
75
|
-
def read_resource(url)
|
76
|
-
io = @http_cache.get(url)
|
77
|
-
return nil unless io
|
78
|
-
|
79
|
-
type, name, keyword, domain = nil
|
80
|
-
io.each_line do |line|
|
81
|
-
case line
|
82
|
-
when /^#{Regexp.escape('[AnnotationDefinition]')}/
|
83
|
-
type = Dataset::ANNOTATION
|
84
|
-
when /^#{Regexp.escape('[Namespace]')}/
|
85
|
-
type = Dataset::NAMESPACE
|
86
|
-
when /^NameString *= *(.*)$/
|
87
|
-
name = Regexp.last_match[1]
|
88
|
-
when /^Keyword *= *(.*)$/
|
89
|
-
keyword = Regexp.last_match[1]
|
90
|
-
when /^DomainString *= *(.*)$/
|
91
|
-
domain = Regexp.last_match[1]
|
92
|
-
when /^#{Regexp.escape('[Values]')}/
|
93
|
-
break
|
94
|
-
end
|
95
|
-
end
|
96
|
-
FileResource.new(url, domain, keyword, name, type)
|
97
|
-
end
|
98
|
-
|
99
|
-
# FileResource is a {Dataset} description obtained from a Annotation or
|
100
|
-
# Namespace file.
|
101
|
-
class FileResource
|
102
|
-
include Dataset
|
103
|
-
|
104
|
-
attr_reader :identifier, :domain, :keyword, :name, :types
|
105
|
-
|
106
|
-
def initialize(identifier, domain, keyword, name, type)
|
107
|
-
@identifier = identifier.to_s
|
108
|
-
@domain = domain.to_s
|
109
|
-
@keyword = keyword.to_s
|
110
|
-
@name = name.to_s
|
111
|
-
@types = [type]
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
# FileResourceValue is a {Value} obtained from a Annotation or Namespace
|
116
|
-
# file.
|
117
|
-
class FileResourceValue
|
118
|
-
include Value
|
119
|
-
|
120
|
-
attr_reader :dataset, :name, :encodings
|
121
|
-
|
122
|
-
def initialize(dataset, name, encodings)
|
123
|
-
@dataset = dataset
|
124
|
-
@name = name
|
125
|
-
@encodings =
|
126
|
-
if dataset.annotation_resource? && !dataset.namespace_resource?
|
127
|
-
nil
|
128
|
-
else
|
129
|
-
encodings.chars.map(&:to_sym)
|
130
|
-
end
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|
134
|
-
end
|
135
|
-
end
|