rbbt-text 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt/corpus/corpus.rb +15 -6
- data/lib/rbbt/corpus/document.rb +100 -127
- data/lib/rbbt/corpus/document_repo.rb +72 -51
- data/lib/rbbt/ner/NER.rb +4 -4
- data/lib/rbbt/ner/abner.rb +5 -4
- data/lib/rbbt/ner/banner.rb +3 -3
- data/lib/rbbt/ner/chemical_tagger.rb +3 -3
- data/lib/rbbt/ner/ngram_prefix_dictionary.rb +45 -10
- data/lib/rbbt/ner/oscar3.rb +3 -3
- data/lib/rbbt/ner/oscar4.rb +3 -3
- data/lib/rbbt/ner/patterns.rb +15 -13
- data/lib/rbbt/ner/regexpNER.rb +3 -2
- data/lib/rbbt/ner/rnorm.rb +2 -2
- data/lib/rbbt/ner/rnorm/cue_index.rb +2 -2
- data/lib/rbbt/ner/{annotations.rb → segment.rb} +161 -109
- data/lib/rbbt/ner/{annotations → segment}/named_entity.rb +3 -11
- data/lib/rbbt/ner/segment/relationship.rb +20 -0
- data/lib/rbbt/ner/segment/segmented.rb +13 -0
- data/lib/rbbt/ner/segment/token.rb +24 -0
- data/lib/rbbt/ner/{annotations → segment}/transformed.rb +10 -10
- data/lib/rbbt/ner/token_trieNER.rb +30 -22
- data/lib/rbbt/nlp/genia/sentence_splitter.rb +2 -1
- data/lib/rbbt/nlp/nlp.rb +23 -37
- data/test/rbbt/corpus/test_document.rb +39 -37
- data/test/rbbt/ner/segment/test_named_entity.rb +29 -0
- data/test/rbbt/ner/segment/test_segmented.rb +23 -0
- data/test/rbbt/ner/{annotations → segment}/test_transformed.rb +6 -6
- data/test/rbbt/ner/test_ngram_prefix_dictionary.rb +15 -1
- data/test/rbbt/ner/test_patterns.rb +11 -12
- data/test/rbbt/ner/test_regexpNER.rb +5 -4
- data/test/rbbt/ner/test_segment.rb +101 -0
- data/test/rbbt/ner/test_token_trieNER.rb +8 -9
- data/test/test_helper.rb +6 -6
- metadata +40 -22
- data/lib/rbbt/ner/annotations/annotated.rb +0 -15
- data/lib/rbbt/ner/annotations/relations.rb +0 -25
- data/lib/rbbt/ner/annotations/token.rb +0 -28
- data/test/rbbt/ner/annotations/test_named_entity.rb +0 -14
- data/test/rbbt/ner/test_annotations.rb +0 -70
data/lib/rbbt/corpus/corpus.rb
CHANGED
@@ -7,17 +7,26 @@ class Corpus
|
|
7
7
|
@corpora_path = case
|
8
8
|
when corpora_path.nil?
|
9
9
|
Rbbt.corpora
|
10
|
-
when (not
|
11
|
-
|
10
|
+
when (not Path === corpora_path)
|
11
|
+
Path.setup(corpora_path)
|
12
12
|
else
|
13
13
|
corpora_path
|
14
14
|
end
|
15
15
|
|
16
|
-
@
|
16
|
+
@corpora_path = @corpora_path.find
|
17
17
|
@persistence_dir = File.join(@corpora_path, "annotations")
|
18
|
-
|
19
|
-
@
|
20
|
-
|
18
|
+
|
19
|
+
Misc.lock(@persistence_dir) do
|
20
|
+
@global_annotations = TSV.setup(Persist.open_tokyocabinet(File.join(@persistence_dir, "global_annotations"), false, :list), :key => "ID", :fields => ["Start", "End", "JSON", "Document ID", "Entity Type"])
|
21
|
+
@global_annotations.unnamed = true
|
22
|
+
@global_annotations.close
|
23
|
+
end
|
24
|
+
|
25
|
+
Misc.lock(@corpora_path.document_repo) do
|
26
|
+
@document_repo = DocumentRepo.open_tokyocabinet @corpora_path.document_repo, false
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
21
30
|
|
22
31
|
def persistence_for(docid)
|
23
32
|
File.join(persistence_dir, docid)
|
data/lib/rbbt/corpus/document.rb
CHANGED
@@ -1,19 +1,21 @@
|
|
1
|
-
require 'rbbt/ner/
|
2
|
-
require 'rbbt/
|
3
|
-
require 'rbbt/
|
1
|
+
require 'rbbt/ner/segment'
|
2
|
+
require 'rbbt/ner/segment/segmented'
|
3
|
+
require 'rbbt/tsv'
|
4
|
+
require 'rbbt/resource/path'
|
5
|
+
require 'rbbt/persist/tsv'
|
4
6
|
require 'rbbt/util/misc'
|
5
7
|
require 'json'
|
6
8
|
|
7
9
|
class Document
|
8
10
|
|
9
|
-
attr_accessor :text, :docid, :namespace, :id, :type, :hash, :
|
10
|
-
def initialize(
|
11
|
-
@
|
11
|
+
attr_accessor :text, :docid, :namespace, :id, :type, :hash, :segments, :segment_indeces, :persist_dir, :global_persistence
|
12
|
+
def initialize(persist_dir = nil, docid = nil, text = nil, global_persistence = nil)
|
13
|
+
@segments = {}
|
12
14
|
@segment_indeces = {}
|
13
15
|
|
14
|
-
if not
|
15
|
-
@
|
16
|
-
@
|
16
|
+
if not persist_dir.nil?
|
17
|
+
@persist_dir = persist_dir
|
18
|
+
@persist_dir = Path.setup(@persist_dir) if not Path == @persist_dir
|
17
19
|
end
|
18
20
|
|
19
21
|
@global_persistence = global_persistence
|
@@ -34,45 +36,6 @@ class Document
|
|
34
36
|
update_docid
|
35
37
|
end
|
36
38
|
|
37
|
-
def self.save_segment(segment, fields = nil)
|
38
|
-
if fields.nil?
|
39
|
-
eend = case segment.offset; when nil; nil; when -1; -1; else segment.end; end
|
40
|
-
[segment.offset, eend, segment.info.to_json]
|
41
|
-
else
|
42
|
-
eend = case segment.offset; when nil; nil; when -1; -1; else segment.end; end
|
43
|
-
info = segment.info
|
44
|
-
info["literal"] = segment.to_s.gsub(/\s/,' ')
|
45
|
-
info.extend IndiferentHash
|
46
|
-
[segment.offset, eend].concat info.values_at(*fields.collect{|f| f.downcase}).collect{|v| Array === v ? v * "|" : v}
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
def self.load_segment(text, annotation, fields = nil)
|
51
|
-
if fields.nil?
|
52
|
-
start, eend, info = annotation.values_at 0,1,2
|
53
|
-
info = JSON.parse(info)
|
54
|
-
else
|
55
|
-
start, eend = annotation.values_at 0,1
|
56
|
-
info = Misc.process_to_hash(fields) do |fields| annotation.values_at(*fields.collect{|f| f.downcase}).collect{|v| v.index("|").nil? ? v : v.split("|")} end
|
57
|
-
end
|
58
|
-
|
59
|
-
Segment.load(text, start, eend, info, @docid)
|
60
|
-
end
|
61
|
-
|
62
|
-
def self.tsv(segments, fields = nil)
|
63
|
-
tsv = TSV.new({}, :list, :key => "ID", :fields => %w(Start End))
|
64
|
-
if fields.nil?
|
65
|
-
tsv.fields += ["Info"]
|
66
|
-
else
|
67
|
-
tsv.fields += fields
|
68
|
-
end
|
69
|
-
|
70
|
-
segments.each{|segment| tsv[segment.id] = Document.save_segment(segment, fields) unless segment.offset.nil?}
|
71
|
-
|
72
|
-
tsv
|
73
|
-
end
|
74
|
-
|
75
|
-
|
76
39
|
#{{{ PERSISTENCE
|
77
40
|
|
78
41
|
TSV_REPOS = {}
|
@@ -82,30 +45,28 @@ class Document
|
|
82
45
|
if not fields.nil?
|
83
46
|
fields = [fields] if not Array === fields
|
84
47
|
fields = fields.collect{|f| f.to_s}
|
85
|
-
FIELDS_FOR_ENTITY_PERSISTENCE[entity.to_s] = fields
|
48
|
+
FIELDS_FOR_ENTITY_PERSISTENCE[entity.to_s] = fields
|
86
49
|
end
|
87
50
|
|
88
51
|
self.class_eval <<-EOC
|
89
|
-
def load_with_persistence_#{entity}
|
52
|
+
def load_with_persistence_#{entity}(raw = false)
|
90
53
|
fields = FIELDS_FOR_ENTITY_PERSISTENCE["#{ entity }"]
|
91
54
|
|
92
|
-
|
93
|
-
:persistence_file => File.join(@persistence_dir, "#{ entity }")) do
|
55
|
+
tsv_file = File.join(@persist_dir.find, "#{ entity }")
|
94
56
|
|
95
|
-
|
96
|
-
if fields.nil?
|
97
|
-
tsv.fields += ["Info"]
|
98
|
-
else
|
99
|
-
tsv.fields += fields
|
100
|
-
end
|
57
|
+
return nil if raw == :check and File.exists? tsv_file
|
101
58
|
|
59
|
+
annotations = Persist.persist("Entity[#{ entity }]", :tsv, :file => tsv_file) do
|
102
60
|
segments = produce_#{entity}
|
103
|
-
|
104
|
-
|
105
|
-
tsv
|
61
|
+
tsv = Segment.tsv(segments, fields)
|
106
62
|
end
|
107
63
|
|
108
|
-
annotations
|
64
|
+
return annotations if raw
|
65
|
+
|
66
|
+
annotations.unnamed = true
|
67
|
+
annotations.collect{|id, annotation|
|
68
|
+
Segment.load_tsv_values(text, annotation, annotations.fields)
|
69
|
+
}
|
109
70
|
end
|
110
71
|
EOC
|
111
72
|
end
|
@@ -125,47 +86,43 @@ class Document
|
|
125
86
|
end
|
126
87
|
|
127
88
|
self.class_eval <<-EOC
|
128
|
-
def load_with_persistence_#{entity}
|
89
|
+
def load_with_persistence_#{entity}(raw = false)
|
129
90
|
repo = TSV_REPOS["#{ entity }"]
|
130
91
|
if repo.nil?
|
131
|
-
raise "No persistence file or
|
132
|
-
repo =
|
92
|
+
raise "No persistence file or persistence dir for persist_in_tsv" if persist_dir.nil?
|
93
|
+
repo = Persist.open_tokyocabinet(persist_dir.annotations_by_type.find, true, :marshal_tsv)
|
133
94
|
end
|
134
95
|
|
135
|
-
|
136
96
|
fields = FIELDS_FOR_ENTITY_PERSISTENCE["#{ entity }"]
|
137
|
-
|
138
97
|
if not repo.include? "#{ entity }"
|
139
|
-
|
140
|
-
if fields.nil?
|
141
|
-
tsv.fields += ["Info"]
|
142
|
-
else
|
143
|
-
tsv.fields += fields
|
144
|
-
end
|
145
|
-
|
146
|
-
produce_#{entity}.each{|segment| tsv[segment.id] = Document.save_segment(segment, fields) unless segment.offset.nil?}
|
98
|
+
segments = produce_#{entity}
|
147
99
|
repo.write
|
148
|
-
repo["#{entity}"] = tsv
|
100
|
+
repo["#{entity}"] = Segment.tsv(segments, fields)
|
149
101
|
repo.read
|
102
|
+
else
|
103
|
+
if raw == :check
|
104
|
+
repo.close
|
105
|
+
return nil
|
106
|
+
end
|
150
107
|
end
|
151
108
|
|
109
|
+
|
152
110
|
annotations = repo["#{entity}"]
|
153
111
|
|
154
112
|
repo.close
|
155
113
|
|
156
|
-
|
114
|
+
|
115
|
+
return annotations if raw
|
116
|
+
|
117
|
+
annotations.unnamed = true
|
118
|
+
annotations.collect{|id, annotation|
|
119
|
+
Segment.load_tsv_values(text, annotation, annotations.fields)
|
120
|
+
}
|
157
121
|
end
|
158
|
-
|
122
|
+
EOC
|
159
123
|
end
|
160
124
|
|
161
125
|
def self.persist_in_global_tsv(entity, tsv = nil, fields = nil, doc_field = nil, entity_field = nil)
|
162
|
-
if not tsv.nil? and not tsv.respond_to?(:keys)
|
163
|
-
entity_field = doc_field if doc_field
|
164
|
-
doc_field = fields if fields
|
165
|
-
fields = tsv if tsv
|
166
|
-
tsv = nil
|
167
|
-
end
|
168
|
-
|
169
126
|
doc_field ||= "Document ID"
|
170
127
|
entity_field ||= "Entity Type"
|
171
128
|
|
@@ -174,34 +131,34 @@ class Document
|
|
174
131
|
if not fields.nil?
|
175
132
|
fields = [fields] if not Array === fields
|
176
133
|
fields = fields.collect{|f| f.to_s}
|
177
|
-
|
134
|
+
else
|
135
|
+
fields = nil
|
178
136
|
end
|
179
137
|
|
138
|
+
FIELDS_FOR_ENTITY_PERSISTENCE[entity.to_s] = fields
|
139
|
+
|
180
140
|
self.class_eval <<-EOC
|
181
|
-
def load_with_persistence_#{entity}
|
141
|
+
def load_with_persistence_#{entity}(raw = false)
|
182
142
|
fields = FIELDS_FOR_ENTITY_PERSISTENCE["#{ entity }"]
|
183
143
|
|
184
|
-
data = TSV_REPOS["#{ entity }"]
|
144
|
+
data = TSV_REPOS["#{ entity }"] || @global_persistence
|
145
|
+
|
146
|
+
data.read true
|
147
|
+
|
148
|
+
fields = data.fields if fields.nil? and data.respond_to? :fields
|
185
149
|
|
186
|
-
if data.nil?
|
187
|
-
data = global_persistence
|
188
|
-
end
|
189
150
|
|
190
151
|
data.filter
|
191
152
|
data.add_filter("field:#{ doc_field }", @docid)
|
192
153
|
data.add_filter("field:#{ entity_field }", "#{ entity }")
|
154
|
+
keys = data.keys
|
155
|
+
data.pop_filter
|
156
|
+
data.pop_filter
|
193
157
|
|
194
|
-
if
|
195
|
-
tsv = TSV.new({}, :list, :key => "ID", :fields => %w(Start End))
|
196
|
-
if fields.nil?
|
197
|
-
tsv.fields += ["Info"]
|
198
|
-
else
|
199
|
-
tsv.fields += fields
|
200
|
-
end
|
201
|
-
|
158
|
+
if keys.empty?
|
202
159
|
segments = produce_#{entity}
|
203
|
-
segments << Segment.
|
204
|
-
segments.
|
160
|
+
segments << Segment.setup("No #{entity} found in document #{ @docid }", -1) if segments.empty?
|
161
|
+
tsv = Segment.tsv(segments, *fields.reject{|f| ["#{doc_field}", "#{entity_field}", "Start", "End", "annotation_types"].include? f})
|
205
162
|
|
206
163
|
tsv.add_field "#{ doc_field }" do
|
207
164
|
@docid
|
@@ -211,16 +168,31 @@ class Document
|
|
211
168
|
"#{ entity }"
|
212
169
|
end
|
213
170
|
|
214
|
-
data.
|
215
|
-
data.
|
171
|
+
data.add_filter("field:#{ doc_field }", @docid)
|
172
|
+
data.add_filter("field:#{ entity_field }", "#{ entity }")
|
173
|
+
data.write true
|
174
|
+
keys = tsv.collect do |key, value|
|
175
|
+
data[key] = value
|
176
|
+
key
|
177
|
+
end
|
178
|
+
data.pop_filter
|
179
|
+
data.pop_filter
|
216
180
|
data.read
|
181
|
+
else
|
182
|
+
if raw == :check
|
183
|
+
data.close
|
184
|
+
return nil
|
185
|
+
end
|
217
186
|
end
|
218
187
|
|
219
|
-
|
220
|
-
data.each{|id, annotation| segments << Document.load_segment(text, annotation, fields) unless annotation[1].to_i == -1}
|
188
|
+
return data.values if raw
|
221
189
|
|
222
|
-
data.
|
223
|
-
data.
|
190
|
+
start_pos = data.identify_field "Start"
|
191
|
+
segments = data.values_at(*keys).collect{|annotation|
|
192
|
+
pos = annotation[start_pos]
|
193
|
+
Segment.load_tsv_values(text, annotation, data.fields) unless [-1, "-1", [-1], ["-1"]].include? pos
|
194
|
+
}.compact
|
195
|
+
data.close
|
224
196
|
|
225
197
|
segments
|
226
198
|
end
|
@@ -232,21 +204,21 @@ class Document
|
|
232
204
|
send :define_method, "produce_#{entity}", &block
|
233
205
|
|
234
206
|
self.class_eval <<-EOC
|
235
|
-
def load_#{entity}
|
236
|
-
return if
|
237
|
-
if self.respond_to?("load_with_persistence_#{entity}") and not @
|
238
|
-
|
207
|
+
def load_#{entity}(raw = false)
|
208
|
+
return if segments.include? "#{ entity }"
|
209
|
+
if self.respond_to?("load_with_persistence_#{entity}") and not @persist_dir.nil?
|
210
|
+
segments["#{entity}"] = load_with_persistence_#{entity}(raw)
|
239
211
|
else
|
240
|
-
|
212
|
+
segments["#{ entity }"] = produce_#{entity}
|
241
213
|
end
|
242
214
|
end
|
243
215
|
|
244
|
-
def #{entity}
|
216
|
+
def #{entity}(raw = false)
|
245
217
|
begin
|
246
|
-
entities =
|
218
|
+
entities = segments["#{ entity }"]
|
247
219
|
if entities.nil?
|
248
|
-
load_#{entity}
|
249
|
-
entities =
|
220
|
+
load_#{entity}(raw)
|
221
|
+
entities = segments["#{ entity }"]
|
250
222
|
end
|
251
223
|
end
|
252
224
|
|
@@ -254,34 +226,35 @@ class Document
|
|
254
226
|
end
|
255
227
|
|
256
228
|
def #{entity}_at(pos, persist = false)
|
257
|
-
segment_index("#{ entity }", persist ? File.join(@
|
229
|
+
segment_index("#{ entity }", persist ? File.join(@persist_dir, 'ranges') : nil)[pos]
|
258
230
|
end
|
259
231
|
|
260
232
|
EOC
|
261
233
|
end
|
262
234
|
|
263
|
-
def segment_index(name,
|
264
|
-
@segment_indeces[name] ||= Segment.index(self.send(name),
|
235
|
+
def segment_index(name, persist_dir = nil)
|
236
|
+
@segment_indeces[name] ||= Segment.index(self.send(name), persist_dir.nil? ? :memory : File.join(persist_dir, name + '.range'))
|
265
237
|
end
|
266
238
|
|
267
239
|
def load_into(segment, *annotations)
|
268
240
|
options = annotations.pop if Hash === annotations.last
|
269
241
|
options ||= {}
|
270
|
-
|
271
|
-
|
242
|
+
|
243
|
+
if options[:persist] and not @persist_dir.nil?
|
244
|
+
persist_dir = File.join(@persist_dir, 'ranges')
|
272
245
|
else
|
273
|
-
|
246
|
+
persist_dir = nil
|
274
247
|
end
|
275
248
|
|
276
|
-
segment
|
277
|
-
segment.annotations ||= {}
|
249
|
+
Segmented.setup(segment, {})
|
278
250
|
annotations.collect do |name|
|
279
251
|
name = name.to_s
|
280
|
-
|
281
|
-
|
252
|
+
index = segment_index(name, persist_dir)
|
253
|
+
annotations = index[segment.range]
|
254
|
+
segment.segments[name] = annotations
|
282
255
|
class << segment
|
283
256
|
self
|
284
|
-
end.class_eval "def #{ name }; @
|
257
|
+
end.class_eval "def #{ name }; @segments['#{ name }']; end"
|
285
258
|
end
|
286
259
|
|
287
260
|
segment
|
@@ -1,69 +1,89 @@
|
|
1
1
|
require 'rbbt/util/misc'
|
2
2
|
require 'tokyocabinet'
|
3
3
|
|
4
|
-
|
4
|
+
module DocumentRepo
|
5
5
|
class OpenError < StandardError;end
|
6
6
|
class KeyFormatError < StandardError;end
|
7
7
|
|
8
|
-
|
8
|
+
TC_CONNECTIONS = {}
|
9
|
+
def self.open_tokyocabinet(path, write)
|
10
|
+
write = true if not File.exists?(path)
|
11
|
+
flags = (write ? TokyoCabinet::BDB::OWRITER | TokyoCabinet::BDB::OCREAT : TokyoCabinet::BDB::OREADER)
|
9
12
|
|
10
|
-
|
13
|
+
FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
|
11
14
|
|
12
|
-
|
13
|
-
|
14
|
-
end
|
15
|
+
database = TC_CONNECTIONS[path] ||= TokyoCabinet::BDB.new
|
16
|
+
database.close
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
d.write
|
20
|
-
else
|
21
|
-
d.read if d.write?
|
18
|
+
if !database.open(path, flags)
|
19
|
+
ecode = database.ecode
|
20
|
+
raise "Open error: #{database.errmsg(ecode)}. Trying to open file #{path}"
|
22
21
|
end
|
23
22
|
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
class << database
|
24
|
+
attr_accessor :writable, :persistence_path
|
25
|
+
|
26
|
+
def read
|
27
|
+
return if not @writable
|
28
|
+
self.close
|
29
|
+
if !self.open(@persistence_path, TokyoCabinet::BDB::OREADER)
|
30
|
+
ecode = self.ecode
|
31
|
+
raise "Open error: #{self.errmsg(ecode)}. Trying to open file #{@persistence_path}"
|
32
|
+
end
|
33
|
+
@writable = false
|
34
|
+
self
|
35
|
+
end
|
36
|
+
|
37
|
+
def write
|
38
|
+
return if @writable
|
39
|
+
self.close
|
40
|
+
if !self.open(@persistence_path, TokyoCabinet::BDB::OWRITER | TokyoCabinet::BDB::OCREAT)
|
41
|
+
ecode = self.ecode
|
42
|
+
raise "Open error: #{self.errmsg(ecode)}. Trying to open file #{@persistence_path}"
|
43
|
+
end
|
44
|
+
@writable = true
|
45
|
+
self
|
46
|
+
end
|
47
|
+
|
48
|
+
def write?
|
49
|
+
@writable
|
50
|
+
end
|
51
|
+
|
52
|
+
def collect
|
53
|
+
res = []
|
54
|
+
each do |key, value|
|
55
|
+
res << if block_given?
|
56
|
+
yield key, value
|
57
|
+
else
|
58
|
+
[key, value]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
res
|
62
|
+
end
|
63
|
+
|
64
|
+
def delete(key)
|
65
|
+
out(key)
|
66
|
+
end
|
67
|
+
|
68
|
+
def values_at(*keys)
|
69
|
+
keys.collect do |key|
|
70
|
+
self[key]
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def merge!(hash)
|
75
|
+
hash.each do |key,values|
|
76
|
+
self[key] = values
|
77
|
+
end
|
78
|
+
end
|
27
79
|
|
28
|
-
alias original_open open
|
29
|
-
def open(write = false)
|
30
|
-
flags = (write ? TokyoCabinet::BDB::OWRITER | TokyoCabinet::BDB::OCREAT : TokyoCabinet::BDB::OREADER)
|
31
|
-
|
32
|
-
FileUtils.mkdir_p File.dirname(@path_to_db) unless File.exists?(File.dirname(@path_to_db))
|
33
|
-
if !self.original_open(@path_to_db, flags)
|
34
|
-
ecode = self.ecode
|
35
|
-
raise OpenError, "Open error: #{self.errmsg(ecode)}. Trying to open file #{@path_to_db}"
|
36
80
|
end
|
37
81
|
|
38
|
-
|
39
|
-
|
40
|
-
end
|
41
|
-
|
42
|
-
def write?
|
43
|
-
@write
|
44
|
-
end
|
82
|
+
database.persistence_path ||= path
|
45
83
|
|
46
|
-
|
47
|
-
self.close
|
48
|
-
self.open(true)
|
49
|
-
end
|
50
|
-
|
51
|
-
def read
|
52
|
-
self.close
|
53
|
-
self.open(false)
|
54
|
-
end
|
84
|
+
database.extend DocumentRepo
|
55
85
|
|
56
|
-
|
57
|
-
super()
|
58
|
-
|
59
|
-
@path_to_db = path
|
60
|
-
|
61
|
-
if write || ! File.exists?(@path_to_db)
|
62
|
-
self.setcache(100000) or raise "Error setting cache"
|
63
|
-
self.open(true)
|
64
|
-
else
|
65
|
-
self.open(false)
|
66
|
-
end
|
86
|
+
database
|
67
87
|
end
|
68
88
|
|
69
89
|
def docid2fields(docid)
|
@@ -79,9 +99,10 @@ class DocumentRepo < TokyoCabinet::BDB
|
|
79
99
|
end
|
80
100
|
|
81
101
|
def add(text, namespace, id, type, hash)
|
82
|
-
write
|
102
|
+
write
|
83
103
|
docid = fields2docid(namespace, id, type, hash)
|
84
104
|
self[docid] = text unless self.include? docid
|
105
|
+
read
|
85
106
|
docid
|
86
107
|
end
|
87
108
|
|