rbbt-text 1.1.9 → 1.3.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/bow/bow.rb +5 -2
  3. data/lib/rbbt/bow/dictionary.rb +27 -23
  4. data/lib/rbbt/document.rb +56 -0
  5. data/lib/rbbt/document/annotation.rb +45 -0
  6. data/lib/rbbt/document/corpus.rb +61 -0
  7. data/lib/rbbt/document/corpus/pubmed.rb +33 -0
  8. data/lib/rbbt/ner/NER.rb +3 -3
  9. data/lib/rbbt/ner/abner.rb +1 -1
  10. data/lib/rbbt/ner/banner.rb +1 -1
  11. data/lib/rbbt/ner/brat.rb +1 -1
  12. data/lib/rbbt/ner/chemical_tagger.rb +1 -2
  13. data/lib/rbbt/ner/g_norm_plus.rb +42 -12
  14. data/lib/rbbt/ner/linnaeus.rb +3 -3
  15. data/lib/rbbt/ner/ngram_prefix_dictionary.rb +3 -3
  16. data/lib/rbbt/ner/oscar3.rb +1 -2
  17. data/lib/rbbt/ner/oscar4.rb +3 -3
  18. data/lib/rbbt/ner/patterns.rb +5 -5
  19. data/lib/rbbt/ner/regexpNER.rb +1 -2
  20. data/lib/rbbt/ner/token_trieNER.rb +35 -22
  21. data/lib/rbbt/nlp/genia/sentence_splitter.rb +3 -2
  22. data/lib/rbbt/nlp/nlp.rb +5 -5
  23. data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +37 -36
  24. data/lib/rbbt/nlp/spaCy.rb +52 -0
  25. data/lib/rbbt/segment.rb +179 -0
  26. data/lib/rbbt/segment/annotation.rb +58 -0
  27. data/lib/rbbt/segment/encoding.rb +18 -0
  28. data/lib/rbbt/{text/segment → segment}/named_entity.rb +11 -10
  29. data/lib/rbbt/segment/overlaps.rb +63 -0
  30. data/lib/rbbt/segment/range_index.rb +35 -0
  31. data/lib/rbbt/segment/relationship.rb +7 -0
  32. data/lib/rbbt/{text/segment → segment}/segmented.rb +1 -1
  33. data/lib/rbbt/segment/token.rb +23 -0
  34. data/lib/rbbt/{text/segment → segment}/transformed.rb +10 -8
  35. data/lib/rbbt/segment/tsv.rb +41 -0
  36. data/share/install/software/Linnaeus +1 -1
  37. data/share/install/software/OpenNLP +1 -1
  38. data/test/rbbt/document/corpus/test_pubmed.rb +15 -0
  39. data/test/rbbt/document/test_annotation.rb +140 -0
  40. data/test/rbbt/document/test_corpus.rb +33 -0
  41. data/test/rbbt/ner/test_finder.rb +3 -3
  42. data/test/rbbt/ner/test_g_norm_plus.rb +20 -3
  43. data/test/rbbt/ner/test_patterns.rb +9 -9
  44. data/test/rbbt/ner/test_regexpNER.rb +14 -14
  45. data/test/rbbt/ner/test_rnorm.rb +3 -4
  46. data/test/rbbt/ner/test_token_trieNER.rb +1 -0
  47. data/test/rbbt/nlp/genia/test_sentence_splitter.rb +37 -3
  48. data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +20 -4
  49. data/test/rbbt/segment/test_annotation.rb +39 -0
  50. data/test/rbbt/segment/test_corpus.rb +36 -0
  51. data/test/rbbt/segment/test_encoding.rb +24 -0
  52. data/test/rbbt/{text/segment → segment}/test_named_entity.rb +15 -11
  53. data/test/rbbt/segment/test_overlaps.rb +69 -0
  54. data/test/rbbt/segment/test_range_index.rb +42 -0
  55. data/test/rbbt/{text/segment → segment}/test_transformed.rb +105 -51
  56. data/test/rbbt/test_document.rb +14 -0
  57. data/test/rbbt/test_segment.rb +182 -0
  58. data/test/test_helper.rb +5 -3
  59. data/test/test_spaCy.rb +32 -0
  60. metadata +44 -32
  61. data/lib/rbbt/text/corpus.rb +0 -106
  62. data/lib/rbbt/text/corpus/document.rb +0 -361
  63. data/lib/rbbt/text/corpus/document_repo.rb +0 -68
  64. data/lib/rbbt/text/corpus/sources/pmid.rb +0 -34
  65. data/lib/rbbt/text/document.rb +0 -39
  66. data/lib/rbbt/text/segment.rb +0 -355
  67. data/lib/rbbt/text/segment/docid.rb +0 -46
  68. data/lib/rbbt/text/segment/relationship.rb +0 -24
  69. data/lib/rbbt/text/segment/token.rb +0 -49
  70. data/test/rbbt/text/corpus/sources/test_pmid.rb +0 -33
  71. data/test/rbbt/text/corpus/test_document.rb +0 -52
  72. data/test/rbbt/text/segment/test_relationship.rb +0 -0
  73. data/test/rbbt/text/segment/test_segmented.rb +0 -23
  74. data/test/rbbt/text/test_corpus.rb +0 -34
  75. data/test/rbbt/text/test_document.rb +0 -58
  76. data/test/rbbt/text/test_segment.rb +0 -100
@@ -0,0 +1,14 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+
4
+ class TestDocument < Test::Unit::TestCase
5
+
6
+ def test_docid
7
+ text = "This is a document"
8
+ Document.setup(text, "TEST", "test_doc1", nil)
9
+
10
+ assert_equal ["TEST", "test_doc1", nil, Misc.digest(text)] * ":", text.docid
11
+ end
12
+
13
+ end
14
+
@@ -0,0 +1,182 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/segment'
5
+
6
+ class TestSegment < Test::Unit::TestCase
7
+ def test_segment
8
+ text = "This is a document"
9
+ Document.setup(text, "TEST", "test_doc1", nil)
10
+
11
+ segment = Segment.setup("is", :offset => text.index("is"), :docid => text.docid)
12
+
13
+ assert_equal text.docid + ":" + segment.offset.to_s + ".." + segment.eend.to_s, segment.segid
14
+ end
15
+
16
+ def test_segid
17
+ text = "This is a document"
18
+ Document.setup(text, "TEST", "test_doc1", nil)
19
+
20
+ corpus = Document::Corpus.setup({})
21
+
22
+ corpus.add_document(text)
23
+
24
+ segment = Segment.setup("is", :offset => text.index("is"), :docid => text.docid)
25
+
26
+ segid = segment.segid(corpus)
27
+
28
+ segment = segid.segment
29
+ assert_equal "is", segment
30
+ end
31
+
32
+ def test_info
33
+ segment = "test"
34
+ segment.extend Segment
35
+ segment.offset = 10
36
+ assert segment.info.include? :offset
37
+ end
38
+
39
+ def test_sort
40
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
41
+ Document.setup(text, "TEST", "test_doc1", nil)
42
+
43
+ corpus = Document::Corpus.setup({})
44
+
45
+ corpus.add_document(text)
46
+
47
+ gene1 = "TP53"
48
+ gene1.extend Segment
49
+ gene1.offset = text.index gene1
50
+ gene1.docid = text.docid
51
+
52
+ gene2 = "CDK5R1"
53
+ gene2.extend Segment
54
+ gene2.offset = text.index gene2
55
+ gene2.docid = text.docid
56
+
57
+ assert_equal [gene1,gene2], Segment.sort([gene2,gene1])
58
+
59
+ assert_equal [gene1,gene2], Segment.sort([gene2.segid(corpus),gene1.segid(corpus)]).collect{|segid| segid.segment}
60
+ end
61
+
62
+ def test_clean_sort
63
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
64
+ Document.setup(text, "TEST", "test_doc1", nil)
65
+
66
+ corpus = Document::Corpus.setup({})
67
+
68
+ corpus.add_document(text)
69
+
70
+ gene1 = "TP53"
71
+ gene1.extend Segment
72
+ gene1.offset = text.index gene1
73
+ gene1.docid = text.docid
74
+
75
+ gene2 = "CDK5R1"
76
+ gene2.extend Segment
77
+ gene2.offset = text.index gene2
78
+ gene2.docid = text.docid
79
+
80
+ gene3 = "TP53 gene"
81
+ gene3.extend Segment
82
+ gene3.offset = text.index gene1
83
+ gene3.docid = text.docid
84
+
85
+ assert_equal [gene1,gene2], Segment.sort([gene2,gene1])
86
+
87
+ assert_equal [gene3,gene2], Segment.clean_sort([gene2,gene1,gene3])
88
+ end
89
+
90
+ def test_split
91
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
92
+ Document.setup(text, "TEST", "test_doc1", nil)
93
+
94
+ corpus = Document::Corpus.setup({})
95
+
96
+ corpus.add_document(text)
97
+
98
+ gene1 = "TP53"
99
+ gene1.extend Segment
100
+ gene1.offset = text.index gene1
101
+ gene1.docid = text.docid
102
+
103
+ gene2 = "CDK5R1"
104
+ gene2.extend Segment
105
+ gene2.offset = text.index gene2
106
+ gene2.docid = text.docid
107
+
108
+ gene3 = "TP53 gene"
109
+ gene3.extend Segment
110
+ gene3.offset = text.index gene1
111
+ gene3.docid = text.docid
112
+
113
+ assert_equal ["This sentence mentions the ", gene3, " and the ", gene2, " protein"], Segment.split(text, [gene2, gene1, gene3])
114
+
115
+ assert_equal ["This sentence mentions the ", gene3, " and the ", gene2, " protein"], Segment.split(text, [gene2, gene1, gene3].collect{|s| s.segid})
116
+ end
117
+
118
+
119
+ def test_align
120
+ text =<<-EOF
121
+ Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors of early childhood poorly responding to therapy.
122
+ EOF
123
+
124
+ parts = text.split(/\W/)
125
+ Segment.align(text, parts)
126
+
127
+ assert_equal "Atypical teratoid/".length, parts.select{|s| s == "rhabdoid"}.first.offset
128
+
129
+ Document.setup(text, "TEST", "test_doc1", nil)
130
+
131
+ parts = text.split(/\W/)
132
+ Segment.align(text, parts)
133
+
134
+ assert_equal parts.first.docid, text.docid
135
+ end
136
+
137
+ def test_segment_index
138
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
139
+ Document.setup(text, "TEST", "test_doc1", nil)
140
+
141
+ corpus = Document::Corpus.setup({})
142
+
143
+ corpus.add_document(text)
144
+
145
+ gene1 = "TP53"
146
+ gene1.extend Segment
147
+ gene1.offset = text.index gene1
148
+ gene1.docid = text.docid
149
+
150
+ gene2 = "CDK5R1"
151
+ gene2.extend Segment
152
+ gene2.offset = text.index gene2
153
+ gene2.docid = text.docid
154
+
155
+ gene3 = "TP53 gene"
156
+ gene3.extend Segment
157
+ gene3.offset = text.index gene1
158
+ gene3.docid = text.docid
159
+
160
+ index = Segment.index([gene1, gene2, gene3], corpus)
161
+ assert_equal "CDK5R1", index[gene2.offset + 1].segment.first
162
+
163
+ TmpFile.with_file do |fwt|
164
+ index = Segment.index([gene1, gene2, gene3], corpus, fwt)
165
+ assert_equal %w(CDK5R1), index[gene2.offset + 1].segment
166
+ index = Segment.index([gene1, gene2, gene3], corpus, fwt)
167
+ assert_equal %w(CDK5R1), index[gene2.offset + 1].segment
168
+ end
169
+
170
+ index = Segment.index([gene1, gene2, gene3].collect{|s| s.segid}, corpus)
171
+ assert_equal "CDK5R1", index[gene2.offset + 1].segment.first
172
+
173
+ TmpFile.with_file do |fwt|
174
+ index = Segment.index([gene1, gene2, gene3].collect{|s| s.segid}, corpus, fwt)
175
+ assert_equal %w(CDK5R1), index[gene2.offset + 1].segment
176
+ index = Segment.index([gene1, gene2, gene3].collect{|s| s.segid}, corpus, fwt)
177
+ assert_equal %w(CDK5R1), index[gene2.offset + 1].segment
178
+ end
179
+ end
180
+
181
+ end
182
+
@@ -6,7 +6,7 @@ require 'rbbt'
6
6
  require 'rbbt/persist'
7
7
  require 'rbbt/util/tmpfile'
8
8
  require 'rbbt/util/log'
9
- require 'rbbt/text/corpus'
9
+ #require 'rbbt/text/corpus'
10
10
 
11
11
  class Test::Unit::TestCase
12
12
  def get_test_datafile(file)
@@ -22,8 +22,10 @@ class Test::Unit::TestCase
22
22
  FileUtils.rm_rf Rbbt.tmp.test.find :user
23
23
  Persist::CONNECTIONS.values.each do |c| c.close end
24
24
  Persist::CONNECTIONS.clear
25
- Corpus::DocumentRepo::TC_CONNECTIONS.values.each do |c| c.close end
26
- Corpus::DocumentRepo::TC_CONNECTIONS.clear
25
+ if defined? Corpus
26
+ Corpus::DocumentRepo::TC_CONNECTIONS.values.each do |c| c.close end
27
+ Corpus::DocumentRepo::TC_CONNECTIONS.clear
28
+ end
27
29
  end
28
30
 
29
31
  end
@@ -0,0 +1,32 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '', 'test_helper.rb')
2
+ require 'rbbt/nlp/spaCy'
3
+ require 'rbbt/document/corpus'
4
+
5
+ class TestSpaCy < Test::Unit::TestCase
6
+ def _test_tokens
7
+ text = "I tell a story"
8
+
9
+ tokens = SpaCy.tokens(text)
10
+
11
+ assert_equal 4, tokens.length
12
+ assert_equal "tell", tokens[1].to_s
13
+ end
14
+
15
+ def test_segments
16
+ text = "I tell a story. It's a very good story."
17
+
18
+ corpus = Document::Corpus.setup({})
19
+
20
+ Document.setup(text, "TEST", "test_doc1", "simple_sentence")
21
+
22
+ corpus.add_document text
23
+ text.corpus = corpus
24
+
25
+ segments = SpaCy.segments(text)
26
+
27
+ segments.each do |segment|
28
+ assert_equal segment, segment.segid.tap{|e| e.corpus = corpus}.segment
29
+ end
30
+ end
31
+ end
32
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.9
4
+ version: 1.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-13 00:00:00.000000000 Z
11
+ date: 2020-06-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -78,6 +78,10 @@ files:
78
78
  - lib/rbbt/bow/bow.rb
79
79
  - lib/rbbt/bow/dictionary.rb
80
80
  - lib/rbbt/bow/misc.rb
81
+ - lib/rbbt/document.rb
82
+ - lib/rbbt/document/annotation.rb
83
+ - lib/rbbt/document/corpus.rb
84
+ - lib/rbbt/document/corpus/pubmed.rb
81
85
  - lib/rbbt/ner/NER.rb
82
86
  - lib/rbbt/ner/abner.rb
83
87
  - lib/rbbt/ner/banner.rb
@@ -98,18 +102,18 @@ files:
98
102
  - lib/rbbt/nlp/genia/sentence_splitter.rb
99
103
  - lib/rbbt/nlp/nlp.rb
100
104
  - lib/rbbt/nlp/open_nlp/sentence_splitter.rb
101
- - lib/rbbt/text/corpus.rb
102
- - lib/rbbt/text/corpus/document.rb
103
- - lib/rbbt/text/corpus/document_repo.rb
104
- - lib/rbbt/text/corpus/sources/pmid.rb
105
- - lib/rbbt/text/document.rb
106
- - lib/rbbt/text/segment.rb
107
- - lib/rbbt/text/segment/docid.rb
108
- - lib/rbbt/text/segment/named_entity.rb
109
- - lib/rbbt/text/segment/relationship.rb
110
- - lib/rbbt/text/segment/segmented.rb
111
- - lib/rbbt/text/segment/token.rb
112
- - lib/rbbt/text/segment/transformed.rb
105
+ - lib/rbbt/nlp/spaCy.rb
106
+ - lib/rbbt/segment.rb
107
+ - lib/rbbt/segment/annotation.rb
108
+ - lib/rbbt/segment/encoding.rb
109
+ - lib/rbbt/segment/named_entity.rb
110
+ - lib/rbbt/segment/overlaps.rb
111
+ - lib/rbbt/segment/range_index.rb
112
+ - lib/rbbt/segment/relationship.rb
113
+ - lib/rbbt/segment/segmented.rb
114
+ - lib/rbbt/segment/token.rb
115
+ - lib/rbbt/segment/transformed.rb
116
+ - lib/rbbt/segment/tsv.rb
113
117
  - share/install/software/ABNER
114
118
  - share/install/software/BANNER
115
119
  - share/install/software/ChemicalTagger
@@ -128,6 +132,9 @@ files:
128
132
  - test/rbbt/bow/test_bow.rb
129
133
  - test/rbbt/bow/test_dictionary.rb
130
134
  - test/rbbt/bow/test_misc.rb
135
+ - test/rbbt/document/corpus/test_pubmed.rb
136
+ - test/rbbt/document/test_annotation.rb
137
+ - test/rbbt/document/test_corpus.rb
131
138
  - test/rbbt/entity/test_document.rb
132
139
  - test/rbbt/ner/test_NER.rb
133
140
  - test/rbbt/ner/test_abner.rb
@@ -146,16 +153,17 @@ files:
146
153
  - test/rbbt/nlp/genia/test_sentence_splitter.rb
147
154
  - test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
148
155
  - test/rbbt/nlp/test_nlp.rb
149
- - test/rbbt/text/corpus/sources/test_pmid.rb
150
- - test/rbbt/text/corpus/test_document.rb
151
- - test/rbbt/text/segment/test_named_entity.rb
152
- - test/rbbt/text/segment/test_relationship.rb
153
- - test/rbbt/text/segment/test_segmented.rb
154
- - test/rbbt/text/segment/test_transformed.rb
155
- - test/rbbt/text/test_corpus.rb
156
- - test/rbbt/text/test_document.rb
157
- - test/rbbt/text/test_segment.rb
156
+ - test/rbbt/segment/test_annotation.rb
157
+ - test/rbbt/segment/test_corpus.rb
158
+ - test/rbbt/segment/test_encoding.rb
159
+ - test/rbbt/segment/test_named_entity.rb
160
+ - test/rbbt/segment/test_overlaps.rb
161
+ - test/rbbt/segment/test_range_index.rb
162
+ - test/rbbt/segment/test_transformed.rb
163
+ - test/rbbt/test_document.rb
164
+ - test/rbbt/test_segment.rb
158
165
  - test/test_helper.rb
166
+ - test/test_spaCy.rb
159
167
  homepage: http://github.com/mikisvaz/rbbt-util
160
168
  licenses: []
161
169
  metadata: {}
@@ -182,18 +190,13 @@ test_files:
182
190
  - test/rbbt/nlp/test_nlp.rb
183
191
  - test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
184
192
  - test/rbbt/nlp/genia/test_sentence_splitter.rb
185
- - test/rbbt/text/test_document.rb
186
- - test/rbbt/text/corpus/sources/test_pmid.rb
187
- - test/rbbt/text/corpus/test_document.rb
188
- - test/rbbt/text/test_segment.rb
189
- - test/rbbt/text/test_corpus.rb
190
- - test/rbbt/text/segment/test_transformed.rb
191
- - test/rbbt/text/segment/test_relationship.rb
192
- - test/rbbt/text/segment/test_named_entity.rb
193
- - test/rbbt/text/segment/test_segmented.rb
194
193
  - test/rbbt/bow/test_bow.rb
195
194
  - test/rbbt/bow/test_misc.rb
196
195
  - test/rbbt/bow/test_dictionary.rb
196
+ - test/rbbt/test_document.rb
197
+ - test/rbbt/document/test_annotation.rb
198
+ - test/rbbt/document/corpus/test_pubmed.rb
199
+ - test/rbbt/document/test_corpus.rb
197
200
  - test/rbbt/entity/test_document.rb
198
201
  - test/rbbt/ner/test_patterns.rb
199
202
  - test/rbbt/ner/test_NER.rb
@@ -209,4 +212,13 @@ test_files:
209
212
  - test/rbbt/ner/test_finder.rb
210
213
  - test/rbbt/ner/test_linnaeus.rb
211
214
  - test/rbbt/ner/test_oscar4.rb
215
+ - test/rbbt/test_segment.rb
216
+ - test/rbbt/segment/test_transformed.rb
217
+ - test/rbbt/segment/test_overlaps.rb
218
+ - test/rbbt/segment/test_annotation.rb
219
+ - test/rbbt/segment/test_named_entity.rb
220
+ - test/rbbt/segment/test_encoding.rb
221
+ - test/rbbt/segment/test_range_index.rb
222
+ - test/rbbt/segment/test_corpus.rb
223
+ - test/test_spaCy.rb
212
224
  - test/test_helper.rb
@@ -1,106 +0,0 @@
1
- require 'rbbt/text/corpus/document'
2
- require 'rbbt/text/corpus/document_repo'
3
-
4
- class Corpus
5
- class << self
6
- attr_accessor :claims
7
- def claim(namespace, &block)
8
- @@claims = {}
9
- @@claims[namespace] = block
10
- end
11
-
12
- end
13
- attr_accessor :corpora_path, :document_repo, :persistence_dir, :global_annotations
14
-
15
- def initialize(corpora_path = nil)
16
- @corpora_path = case
17
- when corpora_path.nil?
18
- Rbbt.corpora
19
- when (not Path === corpora_path)
20
- Path.setup(corpora_path)
21
- else
22
- corpora_path
23
- end
24
-
25
- @corpora_path = @corpora_path.find
26
- @persistence_dir = File.join(@corpora_path, "annotations")
27
-
28
- Misc.lock(@persistence_dir) do
29
- @global_annotations = TSV.setup(Persist.open_tokyocabinet(File.join(@persistence_dir, "global_annotations"), false, :list), :key => "ID", :fields => ["Start", "End", "JSON", "Document ID", "Entity Type"])
30
- @global_annotations.unnamed = true
31
- @global_annotations.close
32
- end
33
-
34
- Misc.lock(@corpora_path.document_repo) do
35
- @document_repo = DocumentRepo.open_tokyocabinet @corpora_path.document_repo, false
36
- @document_repo.close
37
- end
38
-
39
- end
40
-
41
- def persistence_for(docid)
42
- File.join(persistence_dir, docid)
43
- end
44
-
45
-
46
- def docid(docid)
47
- begin
48
- if @document_repo.include?(docid)
49
- Document.new(persistence_for(docid), docid, @document_repo[docid], @global_annotations, self)
50
- else
51
- namespace, id, type = docid.split(":")
52
- if @@claims.include?(namespace)
53
-
54
- docid = self.instance_exec id, type, &(@@claims[namespace])
55
- docid = docid.first if Array === docid
56
- self.docid(docid)
57
- else
58
- raise "Document '#{ docid }' was not found." unless @document_repo.include? docid
59
- end
60
- end
61
- ensure
62
- @document_repo.close
63
- end
64
- end
65
-
66
- def document(namespace, id, type, hash)
67
- docid = [namespace, id, type, hash] * ":"
68
- self.docid(docid)
69
- end
70
-
71
- def add_document(text, namespace = nil, id = nil, type = nil)
72
- text = Misc.fixutf8(text)
73
- hash = Digest::MD5.hexdigest(text)
74
- @document_repo.add(text, namespace, id, type, hash)
75
- end
76
-
77
- def add_docid(text, docid)
78
- namespace, id, type, hash = docid.split(":")
79
- @document_repo.add(text, namespace, id, type, hash)
80
- end
81
-
82
-
83
- def find(namespace=nil, id = nil, type = nil, hash = nil)
84
- @document_repo.find(namespace, id, type, hash).collect{|docid|
85
- self.docid(docid)
86
- }
87
- end
88
-
89
- def find_docid(docid)
90
- @document_repo.find_docid(docid).collect{|docid|
91
- self.docid(docid)
92
- }
93
- end
94
-
95
- def exists?(namespace=nil, id = nil, type = nil, hash = nil)
96
- find(namespace, id, type, hash).any?
97
- end
98
-
99
- def [](docid)
100
- self.docid(docid)
101
- end
102
-
103
- def include?(id)
104
- @document_repo.include? id
105
- end
106
- end