rbbt-text 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbbt/corpus/corpus.rb +15 -6
- data/lib/rbbt/corpus/document.rb +100 -127
- data/lib/rbbt/corpus/document_repo.rb +72 -51
- data/lib/rbbt/ner/NER.rb +4 -4
- data/lib/rbbt/ner/abner.rb +5 -4
- data/lib/rbbt/ner/banner.rb +3 -3
- data/lib/rbbt/ner/chemical_tagger.rb +3 -3
- data/lib/rbbt/ner/ngram_prefix_dictionary.rb +45 -10
- data/lib/rbbt/ner/oscar3.rb +3 -3
- data/lib/rbbt/ner/oscar4.rb +3 -3
- data/lib/rbbt/ner/patterns.rb +15 -13
- data/lib/rbbt/ner/regexpNER.rb +3 -2
- data/lib/rbbt/ner/rnorm.rb +2 -2
- data/lib/rbbt/ner/rnorm/cue_index.rb +2 -2
- data/lib/rbbt/ner/{annotations.rb → segment.rb} +161 -109
- data/lib/rbbt/ner/{annotations → segment}/named_entity.rb +3 -11
- data/lib/rbbt/ner/segment/relationship.rb +20 -0
- data/lib/rbbt/ner/segment/segmented.rb +13 -0
- data/lib/rbbt/ner/segment/token.rb +24 -0
- data/lib/rbbt/ner/{annotations → segment}/transformed.rb +10 -10
- data/lib/rbbt/ner/token_trieNER.rb +30 -22
- data/lib/rbbt/nlp/genia/sentence_splitter.rb +2 -1
- data/lib/rbbt/nlp/nlp.rb +23 -37
- data/test/rbbt/corpus/test_document.rb +39 -37
- data/test/rbbt/ner/segment/test_named_entity.rb +29 -0
- data/test/rbbt/ner/segment/test_segmented.rb +23 -0
- data/test/rbbt/ner/{annotations → segment}/test_transformed.rb +6 -6
- data/test/rbbt/ner/test_ngram_prefix_dictionary.rb +15 -1
- data/test/rbbt/ner/test_patterns.rb +11 -12
- data/test/rbbt/ner/test_regexpNER.rb +5 -4
- data/test/rbbt/ner/test_segment.rb +101 -0
- data/test/rbbt/ner/test_token_trieNER.rb +8 -9
- data/test/test_helper.rb +6 -6
- metadata +40 -22
- data/lib/rbbt/ner/annotations/annotated.rb +0 -15
- data/lib/rbbt/ner/annotations/relations.rb +0 -25
- data/lib/rbbt/ner/annotations/token.rb +0 -28
- data/test/rbbt/ner/annotations/test_named_entity.rb +0 -14
- data/test/rbbt/ner/test_annotations.rb +0 -70
data/lib/rbbt/corpus/corpus.rb
CHANGED
@@ -7,17 +7,26 @@ class Corpus
|
|
7
7
|
@corpora_path = case
|
8
8
|
when corpora_path.nil?
|
9
9
|
Rbbt.corpora
|
10
|
-
when (not
|
11
|
-
|
10
|
+
when (not Path === corpora_path)
|
11
|
+
Path.setup(corpora_path)
|
12
12
|
else
|
13
13
|
corpora_path
|
14
14
|
end
|
15
15
|
|
16
|
-
@
|
16
|
+
@corpora_path = @corpora_path.find
|
17
17
|
@persistence_dir = File.join(@corpora_path, "annotations")
|
18
|
-
|
19
|
-
@
|
20
|
-
|
18
|
+
|
19
|
+
Misc.lock(@persistence_dir) do
|
20
|
+
@global_annotations = TSV.setup(Persist.open_tokyocabinet(File.join(@persistence_dir, "global_annotations"), false, :list), :key => "ID", :fields => ["Start", "End", "JSON", "Document ID", "Entity Type"])
|
21
|
+
@global_annotations.unnamed = true
|
22
|
+
@global_annotations.close
|
23
|
+
end
|
24
|
+
|
25
|
+
Misc.lock(@corpora_path.document_repo) do
|
26
|
+
@document_repo = DocumentRepo.open_tokyocabinet @corpora_path.document_repo, false
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
21
30
|
|
22
31
|
def persistence_for(docid)
|
23
32
|
File.join(persistence_dir, docid)
|
data/lib/rbbt/corpus/document.rb
CHANGED
@@ -1,19 +1,21 @@
|
|
1
|
-
require 'rbbt/ner/
|
2
|
-
require 'rbbt/
|
3
|
-
require 'rbbt/
|
1
|
+
require 'rbbt/ner/segment'
|
2
|
+
require 'rbbt/ner/segment/segmented'
|
3
|
+
require 'rbbt/tsv'
|
4
|
+
require 'rbbt/resource/path'
|
5
|
+
require 'rbbt/persist/tsv'
|
4
6
|
require 'rbbt/util/misc'
|
5
7
|
require 'json'
|
6
8
|
|
7
9
|
class Document
|
8
10
|
|
9
|
-
attr_accessor :text, :docid, :namespace, :id, :type, :hash, :
|
10
|
-
def initialize(
|
11
|
-
@
|
11
|
+
attr_accessor :text, :docid, :namespace, :id, :type, :hash, :segments, :segment_indeces, :persist_dir, :global_persistence
|
12
|
+
def initialize(persist_dir = nil, docid = nil, text = nil, global_persistence = nil)
|
13
|
+
@segments = {}
|
12
14
|
@segment_indeces = {}
|
13
15
|
|
14
|
-
if not
|
15
|
-
@
|
16
|
-
@
|
16
|
+
if not persist_dir.nil?
|
17
|
+
@persist_dir = persist_dir
|
18
|
+
@persist_dir = Path.setup(@persist_dir) if not Path == @persist_dir
|
17
19
|
end
|
18
20
|
|
19
21
|
@global_persistence = global_persistence
|
@@ -34,45 +36,6 @@ class Document
|
|
34
36
|
update_docid
|
35
37
|
end
|
36
38
|
|
37
|
-
def self.save_segment(segment, fields = nil)
|
38
|
-
if fields.nil?
|
39
|
-
eend = case segment.offset; when nil; nil; when -1; -1; else segment.end; end
|
40
|
-
[segment.offset, eend, segment.info.to_json]
|
41
|
-
else
|
42
|
-
eend = case segment.offset; when nil; nil; when -1; -1; else segment.end; end
|
43
|
-
info = segment.info
|
44
|
-
info["literal"] = segment.to_s.gsub(/\s/,' ')
|
45
|
-
info.extend IndiferentHash
|
46
|
-
[segment.offset, eend].concat info.values_at(*fields.collect{|f| f.downcase}).collect{|v| Array === v ? v * "|" : v}
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
def self.load_segment(text, annotation, fields = nil)
|
51
|
-
if fields.nil?
|
52
|
-
start, eend, info = annotation.values_at 0,1,2
|
53
|
-
info = JSON.parse(info)
|
54
|
-
else
|
55
|
-
start, eend = annotation.values_at 0,1
|
56
|
-
info = Misc.process_to_hash(fields) do |fields| annotation.values_at(*fields.collect{|f| f.downcase}).collect{|v| v.index("|").nil? ? v : v.split("|")} end
|
57
|
-
end
|
58
|
-
|
59
|
-
Segment.load(text, start, eend, info, @docid)
|
60
|
-
end
|
61
|
-
|
62
|
-
def self.tsv(segments, fields = nil)
|
63
|
-
tsv = TSV.new({}, :list, :key => "ID", :fields => %w(Start End))
|
64
|
-
if fields.nil?
|
65
|
-
tsv.fields += ["Info"]
|
66
|
-
else
|
67
|
-
tsv.fields += fields
|
68
|
-
end
|
69
|
-
|
70
|
-
segments.each{|segment| tsv[segment.id] = Document.save_segment(segment, fields) unless segment.offset.nil?}
|
71
|
-
|
72
|
-
tsv
|
73
|
-
end
|
74
|
-
|
75
|
-
|
76
39
|
#{{{ PERSISTENCE
|
77
40
|
|
78
41
|
TSV_REPOS = {}
|
@@ -82,30 +45,28 @@ class Document
|
|
82
45
|
if not fields.nil?
|
83
46
|
fields = [fields] if not Array === fields
|
84
47
|
fields = fields.collect{|f| f.to_s}
|
85
|
-
FIELDS_FOR_ENTITY_PERSISTENCE[entity.to_s] = fields
|
48
|
+
FIELDS_FOR_ENTITY_PERSISTENCE[entity.to_s] = fields
|
86
49
|
end
|
87
50
|
|
88
51
|
self.class_eval <<-EOC
|
89
|
-
def load_with_persistence_#{entity}
|
52
|
+
def load_with_persistence_#{entity}(raw = false)
|
90
53
|
fields = FIELDS_FOR_ENTITY_PERSISTENCE["#{ entity }"]
|
91
54
|
|
92
|
-
|
93
|
-
:persistence_file => File.join(@persistence_dir, "#{ entity }")) do
|
55
|
+
tsv_file = File.join(@persist_dir.find, "#{ entity }")
|
94
56
|
|
95
|
-
|
96
|
-
if fields.nil?
|
97
|
-
tsv.fields += ["Info"]
|
98
|
-
else
|
99
|
-
tsv.fields += fields
|
100
|
-
end
|
57
|
+
return nil if raw == :check and File.exists? tsv_file
|
101
58
|
|
59
|
+
annotations = Persist.persist("Entity[#{ entity }]", :tsv, :file => tsv_file) do
|
102
60
|
segments = produce_#{entity}
|
103
|
-
|
104
|
-
|
105
|
-
tsv
|
61
|
+
tsv = Segment.tsv(segments, fields)
|
106
62
|
end
|
107
63
|
|
108
|
-
annotations
|
64
|
+
return annotations if raw
|
65
|
+
|
66
|
+
annotations.unnamed = true
|
67
|
+
annotations.collect{|id, annotation|
|
68
|
+
Segment.load_tsv_values(text, annotation, annotations.fields)
|
69
|
+
}
|
109
70
|
end
|
110
71
|
EOC
|
111
72
|
end
|
@@ -125,47 +86,43 @@ class Document
|
|
125
86
|
end
|
126
87
|
|
127
88
|
self.class_eval <<-EOC
|
128
|
-
def load_with_persistence_#{entity}
|
89
|
+
def load_with_persistence_#{entity}(raw = false)
|
129
90
|
repo = TSV_REPOS["#{ entity }"]
|
130
91
|
if repo.nil?
|
131
|
-
raise "No persistence file or
|
132
|
-
repo =
|
92
|
+
raise "No persistence file or persistence dir for persist_in_tsv" if persist_dir.nil?
|
93
|
+
repo = Persist.open_tokyocabinet(persist_dir.annotations_by_type.find, true, :marshal_tsv)
|
133
94
|
end
|
134
95
|
|
135
|
-
|
136
96
|
fields = FIELDS_FOR_ENTITY_PERSISTENCE["#{ entity }"]
|
137
|
-
|
138
97
|
if not repo.include? "#{ entity }"
|
139
|
-
|
140
|
-
if fields.nil?
|
141
|
-
tsv.fields += ["Info"]
|
142
|
-
else
|
143
|
-
tsv.fields += fields
|
144
|
-
end
|
145
|
-
|
146
|
-
produce_#{entity}.each{|segment| tsv[segment.id] = Document.save_segment(segment, fields) unless segment.offset.nil?}
|
98
|
+
segments = produce_#{entity}
|
147
99
|
repo.write
|
148
|
-
repo["#{entity}"] = tsv
|
100
|
+
repo["#{entity}"] = Segment.tsv(segments, fields)
|
149
101
|
repo.read
|
102
|
+
else
|
103
|
+
if raw == :check
|
104
|
+
repo.close
|
105
|
+
return nil
|
106
|
+
end
|
150
107
|
end
|
151
108
|
|
109
|
+
|
152
110
|
annotations = repo["#{entity}"]
|
153
111
|
|
154
112
|
repo.close
|
155
113
|
|
156
|
-
|
114
|
+
|
115
|
+
return annotations if raw
|
116
|
+
|
117
|
+
annotations.unnamed = true
|
118
|
+
annotations.collect{|id, annotation|
|
119
|
+
Segment.load_tsv_values(text, annotation, annotations.fields)
|
120
|
+
}
|
157
121
|
end
|
158
|
-
|
122
|
+
EOC
|
159
123
|
end
|
160
124
|
|
161
125
|
def self.persist_in_global_tsv(entity, tsv = nil, fields = nil, doc_field = nil, entity_field = nil)
|
162
|
-
if not tsv.nil? and not tsv.respond_to?(:keys)
|
163
|
-
entity_field = doc_field if doc_field
|
164
|
-
doc_field = fields if fields
|
165
|
-
fields = tsv if tsv
|
166
|
-
tsv = nil
|
167
|
-
end
|
168
|
-
|
169
126
|
doc_field ||= "Document ID"
|
170
127
|
entity_field ||= "Entity Type"
|
171
128
|
|
@@ -174,34 +131,34 @@ class Document
|
|
174
131
|
if not fields.nil?
|
175
132
|
fields = [fields] if not Array === fields
|
176
133
|
fields = fields.collect{|f| f.to_s}
|
177
|
-
|
134
|
+
else
|
135
|
+
fields = nil
|
178
136
|
end
|
179
137
|
|
138
|
+
FIELDS_FOR_ENTITY_PERSISTENCE[entity.to_s] = fields
|
139
|
+
|
180
140
|
self.class_eval <<-EOC
|
181
|
-
def load_with_persistence_#{entity}
|
141
|
+
def load_with_persistence_#{entity}(raw = false)
|
182
142
|
fields = FIELDS_FOR_ENTITY_PERSISTENCE["#{ entity }"]
|
183
143
|
|
184
|
-
data = TSV_REPOS["#{ entity }"]
|
144
|
+
data = TSV_REPOS["#{ entity }"] || @global_persistence
|
145
|
+
|
146
|
+
data.read true
|
147
|
+
|
148
|
+
fields = data.fields if fields.nil? and data.respond_to? :fields
|
185
149
|
|
186
|
-
if data.nil?
|
187
|
-
data = global_persistence
|
188
|
-
end
|
189
150
|
|
190
151
|
data.filter
|
191
152
|
data.add_filter("field:#{ doc_field }", @docid)
|
192
153
|
data.add_filter("field:#{ entity_field }", "#{ entity }")
|
154
|
+
keys = data.keys
|
155
|
+
data.pop_filter
|
156
|
+
data.pop_filter
|
193
157
|
|
194
|
-
if
|
195
|
-
tsv = TSV.new({}, :list, :key => "ID", :fields => %w(Start End))
|
196
|
-
if fields.nil?
|
197
|
-
tsv.fields += ["Info"]
|
198
|
-
else
|
199
|
-
tsv.fields += fields
|
200
|
-
end
|
201
|
-
|
158
|
+
if keys.empty?
|
202
159
|
segments = produce_#{entity}
|
203
|
-
segments << Segment.
|
204
|
-
segments.
|
160
|
+
segments << Segment.setup("No #{entity} found in document #{ @docid }", -1) if segments.empty?
|
161
|
+
tsv = Segment.tsv(segments, *fields.reject{|f| ["#{doc_field}", "#{entity_field}", "Start", "End", "annotation_types"].include? f})
|
205
162
|
|
206
163
|
tsv.add_field "#{ doc_field }" do
|
207
164
|
@docid
|
@@ -211,16 +168,31 @@ class Document
|
|
211
168
|
"#{ entity }"
|
212
169
|
end
|
213
170
|
|
214
|
-
data.
|
215
|
-
data.
|
171
|
+
data.add_filter("field:#{ doc_field }", @docid)
|
172
|
+
data.add_filter("field:#{ entity_field }", "#{ entity }")
|
173
|
+
data.write true
|
174
|
+
keys = tsv.collect do |key, value|
|
175
|
+
data[key] = value
|
176
|
+
key
|
177
|
+
end
|
178
|
+
data.pop_filter
|
179
|
+
data.pop_filter
|
216
180
|
data.read
|
181
|
+
else
|
182
|
+
if raw == :check
|
183
|
+
data.close
|
184
|
+
return nil
|
185
|
+
end
|
217
186
|
end
|
218
187
|
|
219
|
-
|
220
|
-
data.each{|id, annotation| segments << Document.load_segment(text, annotation, fields) unless annotation[1].to_i == -1}
|
188
|
+
return data.values if raw
|
221
189
|
|
222
|
-
data.
|
223
|
-
data.
|
190
|
+
start_pos = data.identify_field "Start"
|
191
|
+
segments = data.values_at(*keys).collect{|annotation|
|
192
|
+
pos = annotation[start_pos]
|
193
|
+
Segment.load_tsv_values(text, annotation, data.fields) unless [-1, "-1", [-1], ["-1"]].include? pos
|
194
|
+
}.compact
|
195
|
+
data.close
|
224
196
|
|
225
197
|
segments
|
226
198
|
end
|
@@ -232,21 +204,21 @@ class Document
|
|
232
204
|
send :define_method, "produce_#{entity}", &block
|
233
205
|
|
234
206
|
self.class_eval <<-EOC
|
235
|
-
def load_#{entity}
|
236
|
-
return if
|
237
|
-
if self.respond_to?("load_with_persistence_#{entity}") and not @
|
238
|
-
|
207
|
+
def load_#{entity}(raw = false)
|
208
|
+
return if segments.include? "#{ entity }"
|
209
|
+
if self.respond_to?("load_with_persistence_#{entity}") and not @persist_dir.nil?
|
210
|
+
segments["#{entity}"] = load_with_persistence_#{entity}(raw)
|
239
211
|
else
|
240
|
-
|
212
|
+
segments["#{ entity }"] = produce_#{entity}
|
241
213
|
end
|
242
214
|
end
|
243
215
|
|
244
|
-
def #{entity}
|
216
|
+
def #{entity}(raw = false)
|
245
217
|
begin
|
246
|
-
entities =
|
218
|
+
entities = segments["#{ entity }"]
|
247
219
|
if entities.nil?
|
248
|
-
load_#{entity}
|
249
|
-
entities =
|
220
|
+
load_#{entity}(raw)
|
221
|
+
entities = segments["#{ entity }"]
|
250
222
|
end
|
251
223
|
end
|
252
224
|
|
@@ -254,34 +226,35 @@ class Document
|
|
254
226
|
end
|
255
227
|
|
256
228
|
def #{entity}_at(pos, persist = false)
|
257
|
-
segment_index("#{ entity }", persist ? File.join(@
|
229
|
+
segment_index("#{ entity }", persist ? File.join(@persist_dir, 'ranges') : nil)[pos]
|
258
230
|
end
|
259
231
|
|
260
232
|
EOC
|
261
233
|
end
|
262
234
|
|
263
|
-
def segment_index(name,
|
264
|
-
@segment_indeces[name] ||= Segment.index(self.send(name),
|
235
|
+
def segment_index(name, persist_dir = nil)
|
236
|
+
@segment_indeces[name] ||= Segment.index(self.send(name), persist_dir.nil? ? :memory : File.join(persist_dir, name + '.range'))
|
265
237
|
end
|
266
238
|
|
267
239
|
def load_into(segment, *annotations)
|
268
240
|
options = annotations.pop if Hash === annotations.last
|
269
241
|
options ||= {}
|
270
|
-
|
271
|
-
|
242
|
+
|
243
|
+
if options[:persist] and not @persist_dir.nil?
|
244
|
+
persist_dir = File.join(@persist_dir, 'ranges')
|
272
245
|
else
|
273
|
-
|
246
|
+
persist_dir = nil
|
274
247
|
end
|
275
248
|
|
276
|
-
segment
|
277
|
-
segment.annotations ||= {}
|
249
|
+
Segmented.setup(segment, {})
|
278
250
|
annotations.collect do |name|
|
279
251
|
name = name.to_s
|
280
|
-
|
281
|
-
|
252
|
+
index = segment_index(name, persist_dir)
|
253
|
+
annotations = index[segment.range]
|
254
|
+
segment.segments[name] = annotations
|
282
255
|
class << segment
|
283
256
|
self
|
284
|
-
end.class_eval "def #{ name }; @
|
257
|
+
end.class_eval "def #{ name }; @segments['#{ name }']; end"
|
285
258
|
end
|
286
259
|
|
287
260
|
segment
|
@@ -1,69 +1,89 @@
|
|
1
1
|
require 'rbbt/util/misc'
|
2
2
|
require 'tokyocabinet'
|
3
3
|
|
4
|
-
|
4
|
+
module DocumentRepo
|
5
5
|
class OpenError < StandardError;end
|
6
6
|
class KeyFormatError < StandardError;end
|
7
7
|
|
8
|
-
|
8
|
+
TC_CONNECTIONS = {}
|
9
|
+
def self.open_tokyocabinet(path, write)
|
10
|
+
write = true if not File.exists?(path)
|
11
|
+
flags = (write ? TokyoCabinet::BDB::OWRITER | TokyoCabinet::BDB::OCREAT : TokyoCabinet::BDB::OREADER)
|
9
12
|
|
10
|
-
|
13
|
+
FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
|
11
14
|
|
12
|
-
|
13
|
-
|
14
|
-
end
|
15
|
+
database = TC_CONNECTIONS[path] ||= TokyoCabinet::BDB.new
|
16
|
+
database.close
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
d.write
|
20
|
-
else
|
21
|
-
d.read if d.write?
|
18
|
+
if !database.open(path, flags)
|
19
|
+
ecode = database.ecode
|
20
|
+
raise "Open error: #{database.errmsg(ecode)}. Trying to open file #{path}"
|
22
21
|
end
|
23
22
|
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
class << database
|
24
|
+
attr_accessor :writable, :persistence_path
|
25
|
+
|
26
|
+
def read
|
27
|
+
return if not @writable
|
28
|
+
self.close
|
29
|
+
if !self.open(@persistence_path, TokyoCabinet::BDB::OREADER)
|
30
|
+
ecode = self.ecode
|
31
|
+
raise "Open error: #{self.errmsg(ecode)}. Trying to open file #{@persistence_path}"
|
32
|
+
end
|
33
|
+
@writable = false
|
34
|
+
self
|
35
|
+
end
|
36
|
+
|
37
|
+
def write
|
38
|
+
return if @writable
|
39
|
+
self.close
|
40
|
+
if !self.open(@persistence_path, TokyoCabinet::BDB::OWRITER | TokyoCabinet::BDB::OCREAT)
|
41
|
+
ecode = self.ecode
|
42
|
+
raise "Open error: #{self.errmsg(ecode)}. Trying to open file #{@persistence_path}"
|
43
|
+
end
|
44
|
+
@writable = true
|
45
|
+
self
|
46
|
+
end
|
47
|
+
|
48
|
+
def write?
|
49
|
+
@writable
|
50
|
+
end
|
51
|
+
|
52
|
+
def collect
|
53
|
+
res = []
|
54
|
+
each do |key, value|
|
55
|
+
res << if block_given?
|
56
|
+
yield key, value
|
57
|
+
else
|
58
|
+
[key, value]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
res
|
62
|
+
end
|
63
|
+
|
64
|
+
def delete(key)
|
65
|
+
out(key)
|
66
|
+
end
|
67
|
+
|
68
|
+
def values_at(*keys)
|
69
|
+
keys.collect do |key|
|
70
|
+
self[key]
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def merge!(hash)
|
75
|
+
hash.each do |key,values|
|
76
|
+
self[key] = values
|
77
|
+
end
|
78
|
+
end
|
27
79
|
|
28
|
-
alias original_open open
|
29
|
-
def open(write = false)
|
30
|
-
flags = (write ? TokyoCabinet::BDB::OWRITER | TokyoCabinet::BDB::OCREAT : TokyoCabinet::BDB::OREADER)
|
31
|
-
|
32
|
-
FileUtils.mkdir_p File.dirname(@path_to_db) unless File.exists?(File.dirname(@path_to_db))
|
33
|
-
if !self.original_open(@path_to_db, flags)
|
34
|
-
ecode = self.ecode
|
35
|
-
raise OpenError, "Open error: #{self.errmsg(ecode)}. Trying to open file #{@path_to_db}"
|
36
80
|
end
|
37
81
|
|
38
|
-
|
39
|
-
|
40
|
-
end
|
41
|
-
|
42
|
-
def write?
|
43
|
-
@write
|
44
|
-
end
|
82
|
+
database.persistence_path ||= path
|
45
83
|
|
46
|
-
|
47
|
-
self.close
|
48
|
-
self.open(true)
|
49
|
-
end
|
50
|
-
|
51
|
-
def read
|
52
|
-
self.close
|
53
|
-
self.open(false)
|
54
|
-
end
|
84
|
+
database.extend DocumentRepo
|
55
85
|
|
56
|
-
|
57
|
-
super()
|
58
|
-
|
59
|
-
@path_to_db = path
|
60
|
-
|
61
|
-
if write || ! File.exists?(@path_to_db)
|
62
|
-
self.setcache(100000) or raise "Error setting cache"
|
63
|
-
self.open(true)
|
64
|
-
else
|
65
|
-
self.open(false)
|
66
|
-
end
|
86
|
+
database
|
67
87
|
end
|
68
88
|
|
69
89
|
def docid2fields(docid)
|
@@ -79,9 +99,10 @@ class DocumentRepo < TokyoCabinet::BDB
|
|
79
99
|
end
|
80
100
|
|
81
101
|
def add(text, namespace, id, type, hash)
|
82
|
-
write
|
102
|
+
write
|
83
103
|
docid = fields2docid(namespace, id, type, hash)
|
84
104
|
self[docid] = text unless self.include? docid
|
105
|
+
read
|
85
106
|
docid
|
86
107
|
end
|
87
108
|
|