wordtree 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -13,50 +13,32 @@ describe WordTree::Book do
13
13
  end
14
14
 
15
15
  it "can return cleaned content" do
16
- book = WordTree::Book.create("book", {}, "Wi&ld\nContent!")
17
- expect(book.content_clean).to eq("wild content\n")
16
+ content = "Wi&ld\nContent!"
17
+ book = WordTree::Book.create("book", {}, content)
18
+ expect(content).to eq("Wi&ld\nContent!")
19
+ expect(book.content_clean).to eq("wild content.")
18
20
  end
19
21
 
20
- context "ngrams" do
21
- let(:content) { "A man. A plan. And a man."}
22
- let(:book) { WordTree::Book.create("book", {}, content) }
23
- let(:one_grams) { { "a" => 3, "man" => 2, "plan" => 1, "and" => 1, "." => 3 } }
24
- let(:two_grams) {
25
- {"a man" => 2, "man ." => 2, ". a" => 1, "a plan" => 1,
26
- "plan ." => 1, ". and" => 1, "and a" => 1}
27
- }
28
- describe "#count_ngrams" do
29
- it "creates a hash lookup table" do
30
- hash = book.count_ngrams(1)
31
- expect(hash).to be_a(Hash)
32
- end
33
-
34
- it "has counts of ngrams" do
35
- hash = book.count_ngrams(1)
36
- expect(hash).to eq(one_grams)
37
- hash = book.count_ngrams(2)
38
- expect(hash).to eq(two_grams)
39
- end
40
-
41
- it "memoizes ngrams" do
42
- expect(book).to receive(:count_ngrams).with(1).and_return(one_grams)
43
- expect(book.ngrams(1)).to eq one_grams
44
- expect(book).to_not receive(:count_ngrams)
45
- expect(book.ngrams(1)).to eq one_grams
46
- end
47
- end
48
-
49
- describe "#set_ngrams" do
50
- it "sets the lookup hash" do
51
- book.set_ngrams(1, {"one" => 1})
52
- expect(book.ngrams(1)).to eq("one" => 1)
53
- expect(book.ngrams(2)).to eq(two_grams)
54
- end
55
-
56
- it "raises an error when not a hash" do
57
- expect{ book.set_ngrams(1, "string") }.to raise_error
58
- expect{ book.set_ngrams(1, nil) }.to raise_error
59
- end
60
- end
61
- end
22
+ # context "ngrams" do
23
+ # let(:content) { "A man. A plan. And a man."}
24
+ # let(:book) { WordTree::Book.create("book", {}, content) }
25
+ # let(:one_grams) { { "a" => 3, "man" => 2, "plan" => 1, "and" => 1, "." => 3 } }
26
+ # let(:two_grams) {
27
+ # {"a man" => 2, "man ." => 2, ". a" => 1, "a plan" => 1,
28
+ # "plan ." => 1, ". and" => 1, "and a" => 1}
29
+ # }
30
+ # describe "#ngrams" do
31
+ # it "creates a hash lookup table" do
32
+ # hash = book.count_ngrams(1)
33
+ # expect(hash).to be_a(Hash)
34
+ # end
35
+
36
+ # it "has counts of ngrams" do
37
+ # hash = book.count_ngrams(1)
38
+ # expect(hash).to eq(one_grams)
39
+ # hash = book.count_ngrams(2)
40
+ # expect(hash).to eq(two_grams)
41
+ # end
42
+ # end
43
+ # end
62
44
  end
@@ -32,12 +32,6 @@ describe WordTree::Disk::Librarian do
32
32
  expect(book.year).to eq(1800)
33
33
  expect(book.content).to eq("Book with content")
34
34
  end
35
-
36
- it "loads ngrams if available" do
37
- book = librarian.find("book")
38
- expect(book).to_not receive(:count_ngrams)
39
- expect(book.ngrams(1)).to eq("xyz" => 1)
40
- end
41
35
  end
42
36
 
43
37
  describe "#each" do
@@ -46,43 +40,5 @@ describe WordTree::Disk::Librarian do
46
40
  expect(book_sizes).to contain_exactly(17, 23)
47
41
  end
48
42
  end
49
-
50
- it "saves ngrams to disk" do
51
- tmp_root = Dir.mktmpdir
52
- tmp_library = WordTree::Disk::Library.new(tmp_root)
53
- tmp_librarian = WordTree::Disk::Librarian.new(tmp_library)
54
-
55
- book = librarian.find("book")
56
- book.ngrams(1)
57
- book.ngrams(2)
58
-
59
- tmp_librarian.save(book)
60
-
61
- ngrams_filepath = tmp_library.path_to("book", :ngrams, :n => 1)
62
- expect(File.exist?(ngrams_filepath)).to be_truthy
63
- end
64
-
65
- it "saves to disk (yaml, content)" do
66
- tmp_root = Dir.mktmpdir
67
- tmp_library = WordTree::Disk::Library.new(tmp_root)
68
- tmp_librarian = WordTree::Disk::Librarian.new(tmp_library)
69
-
70
- book = librarian.find_without_ngrams("book")
71
-
72
- book.source = "test"
73
- book.content += "."
74
-
75
- tmp_librarian.save(book)
76
-
77
- updated = Preamble.load(tmp_library.path_to("book"))
78
- expect(updated.metadata).to eq(
79
- :id => "book",
80
- :archive_org_id => "book",
81
- :year => 1800,
82
- :source => "test",
83
- :size_bytes => 17)
84
- expect(updated.content).to eq("Book with content.")
85
- end
86
-
87
43
  end
88
44
  end
@@ -0,0 +1,81 @@
1
+ require 'spec_helper'
2
+ require 'wordtree/text'
3
+ require 'timeout'
4
+
5
+ describe WordTree::Text do
6
+ context "#split_near" do
7
+ it "splits on spaces" do
8
+ line, rem = WordTree::Text.split_near("it is near", 7)
9
+ expect(line).to eq("it is")
10
+ expect(rem).to eq("near")
11
+ end
12
+
13
+ it "removes a space if index lands on one" do
14
+ line, rem = WordTree::Text.split_near("it is near", 5)
15
+ expect(line).to eq("it is")
16
+ expect(rem).to eq("near")
17
+ end
18
+
19
+ it "keeps the whole line if index is >= length of line" do
20
+ line, rem = WordTree::Text.split_near("it is near", 10)
21
+ expect(line).to eq("it is near")
22
+ expect(rem).to eq("")
23
+
24
+ line, rem = WordTree::Text.split_near("it is near", 11)
25
+ expect(line).to eq("it is near")
26
+ expect(rem).to eq("")
27
+ end
28
+
29
+ it "splits at the index anyway if no spaces are found" do
30
+ line, rem = WordTree::Text.split_near("itisnear", 4)
31
+ expect(line).to eq("itis")
32
+ expect(rem).to eq("near")
33
+ end
34
+ end
35
+
36
+ describe "#clean" do
37
+ it "wraps" do
38
+ sample_text = "This, [here] is awesome, right"
39
+ cleaned = WordTree::Text.clean(sample_text)
40
+ expect(cleaned).to eq("this here is awesome right")
41
+ end
42
+
43
+ it "joins lines ending in -" do
44
+ sample_text = "What-\never\ndo you\n mean?"
45
+ cleaned = WordTree::Text.clean(sample_text)
46
+ expect(cleaned).to eq("whatever do you mean.")
47
+ end
48
+
49
+ it "does not ignore sentence boundaries" do
50
+ sample_text = "This is a sentence. And so is this? Keep the dots."
51
+ cleaned = WordTree::Text.clean(sample_text)
52
+ expect(cleaned).to eq("this is a sentence.and so is this.keep the dots.")
53
+ end
54
+
55
+ it "compresses sentence boundary punctuation and spaces" do
56
+ sample_text = "words . . and.. stuff"
57
+ cleaned = WordTree::Text.clean(sample_text)
58
+ expect(cleaned).to eq("words.and.stuff")
59
+ end
60
+ end
61
+
62
+ describe "#common_trigrams" do
63
+ it "returns 0 for strings of len < 3" do
64
+ expect(WordTree::Text.common_trigrams("")).to eq 0
65
+ expect(WordTree::Text.common_trigrams("1")).to eq 0
66
+ expect(WordTree::Text.common_trigrams("12")).to eq 0
67
+ end
68
+
69
+ it "returns 0 for strings without common trigrams" do
70
+ expect(WordTree::Text.common_trigrams("!{*@*!()}")).to eq 0
71
+ expect(WordTree::Text.common_trigrams("qwrtypzzx")).to eq 0
72
+ expect(WordTree::Text.common_trigrams(" ")).to eq 0
73
+ end
74
+
75
+ it "returns correct counts for strings with trigrams" do
76
+ expect(WordTree::Text.common_trigrams("what")).to eq 1
77
+ expect(WordTree::Text.common_trigrams("the wall")).to eq 2
78
+ end
79
+ end
80
+
81
+ end
data/wordtree.gemspec CHANGED
@@ -5,12 +5,12 @@ require 'wordtree/version'
5
5
 
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = "wordtree"
8
- spec.version = Wordtree::VERSION
8
+ spec.version = WordTree::VERSION
9
9
  spec.authors = ["Duane Johnson"]
10
10
  spec.email = ["duane.johnson@gmail.com"]
11
11
  spec.description = %q{WordTree common library code}
12
- spec.summary = %q{Wordtree common library code}
13
- spec.homepage = ""
12
+ spec.summary = %q{WordTree common library code}
13
+ spec.homepage = "https://github.com/wordtreefoundation/wordtree-ruby"
14
14
  spec.license = "MIT"
15
15
 
16
16
  spec.files = `git ls-files`.split($/)
@@ -18,19 +18,21 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_dependency "virtus"
22
- spec.add_dependency "preamble", ">= 0.0.3"
23
- spec.add_dependency "archivist-client", ">= 0.1.7"
24
- spec.add_dependency "retriable"
21
+ spec.extensions = %w[ext/extconf.rb]
22
+
23
+ spec.add_dependency "virtus", "~> 1.0"
24
+ spec.add_dependency "preamble", "0.0.3"
25
+ spec.add_dependency "archivist-client", "0.1.7"
26
+ spec.add_dependency "retriable", "1.4.1"
25
27
  spec.add_dependency "simhash", "0.2.5"
26
- spec.add_dependency "rethinkdb", "~> 1.14.0"
28
+ spec.add_dependency "rethinkdb", "~> 1.14"
27
29
 
28
30
  spec.add_development_dependency "bundler", "~> 1.3"
29
- spec.add_development_dependency "rake"
30
- spec.add_development_dependency "debugger"
31
- spec.add_development_dependency "rspec"
32
- spec.add_development_dependency "guard"
33
- spec.add_development_dependency "guard-rspec"
34
- spec.add_development_dependency "vcr"
35
- spec.add_development_dependency "webmock"
31
+ spec.add_development_dependency "rake", "~> 10.3"
32
+ spec.add_development_dependency "byebug", "~> 3.4"
33
+ spec.add_development_dependency "rspec", "~> 3.1"
34
+ spec.add_development_dependency "guard", "~> 2.6"
35
+ spec.add_development_dependency "guard-rspec", "~> 4.3"
36
+ spec.add_development_dependency "vcr", "~> 2.9"
37
+ spec.add_development_dependency "webmock", "~> 1.18"
36
38
  end
metadata CHANGED
@@ -1,84 +1,74 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wordtree
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
5
- prerelease:
4
+ version: 0.4.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Duane Johnson
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2014-09-08 00:00:00.000000000 Z
11
+ date: 2014-09-26 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: virtus
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
- version: '0'
19
+ version: '1.0'
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - "~>"
28
25
  - !ruby/object:Gem::Version
29
- version: '0'
26
+ version: '1.0'
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: preamble
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
- - - ! '>='
31
+ - - '='
36
32
  - !ruby/object:Gem::Version
37
33
  version: 0.0.3
38
34
  type: :runtime
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
- - - ! '>='
38
+ - - '='
44
39
  - !ruby/object:Gem::Version
45
40
  version: 0.0.3
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: archivist-client
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
- - - ! '>='
45
+ - - '='
52
46
  - !ruby/object:Gem::Version
53
47
  version: 0.1.7
54
48
  type: :runtime
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
- - - ! '>='
52
+ - - '='
60
53
  - !ruby/object:Gem::Version
61
54
  version: 0.1.7
62
55
  - !ruby/object:Gem::Dependency
63
56
  name: retriable
64
57
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
58
  requirements:
67
- - - ! '>='
59
+ - - '='
68
60
  - !ruby/object:Gem::Version
69
- version: '0'
61
+ version: 1.4.1
70
62
  type: :runtime
71
63
  prerelease: false
72
64
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
65
  requirements:
75
- - - ! '>='
66
+ - - '='
76
67
  - !ruby/object:Gem::Version
77
- version: '0'
68
+ version: 1.4.1
78
69
  - !ruby/object:Gem::Dependency
79
70
  name: simhash
80
71
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
72
  requirements:
83
73
  - - '='
84
74
  - !ruby/object:Gem::Version
@@ -86,7 +76,6 @@ dependencies:
86
76
  type: :runtime
87
77
  prerelease: false
88
78
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
79
  requirements:
91
80
  - - '='
92
81
  - !ruby/object:Gem::Version
@@ -94,169 +83,157 @@ dependencies:
94
83
  - !ruby/object:Gem::Dependency
95
84
  name: rethinkdb
96
85
  requirement: !ruby/object:Gem::Requirement
97
- none: false
98
86
  requirements:
99
- - - ~>
87
+ - - "~>"
100
88
  - !ruby/object:Gem::Version
101
- version: 1.14.0
89
+ version: '1.14'
102
90
  type: :runtime
103
91
  prerelease: false
104
92
  version_requirements: !ruby/object:Gem::Requirement
105
- none: false
106
93
  requirements:
107
- - - ~>
94
+ - - "~>"
108
95
  - !ruby/object:Gem::Version
109
- version: 1.14.0
96
+ version: '1.14'
110
97
  - !ruby/object:Gem::Dependency
111
98
  name: bundler
112
99
  requirement: !ruby/object:Gem::Requirement
113
- none: false
114
100
  requirements:
115
- - - ~>
101
+ - - "~>"
116
102
  - !ruby/object:Gem::Version
117
103
  version: '1.3'
118
104
  type: :development
119
105
  prerelease: false
120
106
  version_requirements: !ruby/object:Gem::Requirement
121
- none: false
122
107
  requirements:
123
- - - ~>
108
+ - - "~>"
124
109
  - !ruby/object:Gem::Version
125
110
  version: '1.3'
126
111
  - !ruby/object:Gem::Dependency
127
112
  name: rake
128
113
  requirement: !ruby/object:Gem::Requirement
129
- none: false
130
114
  requirements:
131
- - - ! '>='
115
+ - - "~>"
132
116
  - !ruby/object:Gem::Version
133
- version: '0'
117
+ version: '10.3'
134
118
  type: :development
135
119
  prerelease: false
136
120
  version_requirements: !ruby/object:Gem::Requirement
137
- none: false
138
121
  requirements:
139
- - - ! '>='
122
+ - - "~>"
140
123
  - !ruby/object:Gem::Version
141
- version: '0'
124
+ version: '10.3'
142
125
  - !ruby/object:Gem::Dependency
143
- name: debugger
126
+ name: byebug
144
127
  requirement: !ruby/object:Gem::Requirement
145
- none: false
146
128
  requirements:
147
- - - ! '>='
129
+ - - "~>"
148
130
  - !ruby/object:Gem::Version
149
- version: '0'
131
+ version: '3.4'
150
132
  type: :development
151
133
  prerelease: false
152
134
  version_requirements: !ruby/object:Gem::Requirement
153
- none: false
154
135
  requirements:
155
- - - ! '>='
136
+ - - "~>"
156
137
  - !ruby/object:Gem::Version
157
- version: '0'
138
+ version: '3.4'
158
139
  - !ruby/object:Gem::Dependency
159
140
  name: rspec
160
141
  requirement: !ruby/object:Gem::Requirement
161
- none: false
162
142
  requirements:
163
- - - ! '>='
143
+ - - "~>"
164
144
  - !ruby/object:Gem::Version
165
- version: '0'
145
+ version: '3.1'
166
146
  type: :development
167
147
  prerelease: false
168
148
  version_requirements: !ruby/object:Gem::Requirement
169
- none: false
170
149
  requirements:
171
- - - ! '>='
150
+ - - "~>"
172
151
  - !ruby/object:Gem::Version
173
- version: '0'
152
+ version: '3.1'
174
153
  - !ruby/object:Gem::Dependency
175
154
  name: guard
176
155
  requirement: !ruby/object:Gem::Requirement
177
- none: false
178
156
  requirements:
179
- - - ! '>='
157
+ - - "~>"
180
158
  - !ruby/object:Gem::Version
181
- version: '0'
159
+ version: '2.6'
182
160
  type: :development
183
161
  prerelease: false
184
162
  version_requirements: !ruby/object:Gem::Requirement
185
- none: false
186
163
  requirements:
187
- - - ! '>='
164
+ - - "~>"
188
165
  - !ruby/object:Gem::Version
189
- version: '0'
166
+ version: '2.6'
190
167
  - !ruby/object:Gem::Dependency
191
168
  name: guard-rspec
192
169
  requirement: !ruby/object:Gem::Requirement
193
- none: false
194
170
  requirements:
195
- - - ! '>='
171
+ - - "~>"
196
172
  - !ruby/object:Gem::Version
197
- version: '0'
173
+ version: '4.3'
198
174
  type: :development
199
175
  prerelease: false
200
176
  version_requirements: !ruby/object:Gem::Requirement
201
- none: false
202
177
  requirements:
203
- - - ! '>='
178
+ - - "~>"
204
179
  - !ruby/object:Gem::Version
205
- version: '0'
180
+ version: '4.3'
206
181
  - !ruby/object:Gem::Dependency
207
182
  name: vcr
208
183
  requirement: !ruby/object:Gem::Requirement
209
- none: false
210
184
  requirements:
211
- - - ! '>='
185
+ - - "~>"
212
186
  - !ruby/object:Gem::Version
213
- version: '0'
187
+ version: '2.9'
214
188
  type: :development
215
189
  prerelease: false
216
190
  version_requirements: !ruby/object:Gem::Requirement
217
- none: false
218
191
  requirements:
219
- - - ! '>='
192
+ - - "~>"
220
193
  - !ruby/object:Gem::Version
221
- version: '0'
194
+ version: '2.9'
222
195
  - !ruby/object:Gem::Dependency
223
196
  name: webmock
224
197
  requirement: !ruby/object:Gem::Requirement
225
- none: false
226
198
  requirements:
227
- - - ! '>='
199
+ - - "~>"
228
200
  - !ruby/object:Gem::Version
229
- version: '0'
201
+ version: '1.18'
230
202
  type: :development
231
203
  prerelease: false
232
204
  version_requirements: !ruby/object:Gem::Requirement
233
- none: false
234
205
  requirements:
235
- - - ! '>='
206
+ - - "~>"
236
207
  - !ruby/object:Gem::Version
237
- version: '0'
208
+ version: '1.18'
238
209
  description: WordTree common library code
239
210
  email:
240
211
  - duane.johnson@gmail.com
241
212
  executables: []
242
- extensions: []
213
+ extensions:
214
+ - ext/extconf.rb
243
215
  extra_rdoc_files: []
244
216
  files:
245
- - .gitignore
246
- - .rspec
217
+ - ".gitignore"
218
+ - ".rspec"
247
219
  - Gemfile
248
220
  - Guardfile
249
221
  - LICENSE.txt
250
222
  - README.md
251
223
  - Rakefile
224
+ - ext/Makefile
225
+ - ext/extconf.rb
226
+ - ext/wordtree.cc
252
227
  - lib/wordtree.rb
253
228
  - lib/wordtree/archdown.rb
254
229
  - lib/wordtree/book.rb
230
+ - lib/wordtree/book_list.rb
255
231
  - lib/wordtree/db/librarian.rb
256
232
  - lib/wordtree/disk/librarian.rb
257
233
  - lib/wordtree/disk/library.rb
258
234
  - lib/wordtree/disk/library_locator.rb
259
- - lib/wordtree/text_utils.rb
235
+ - lib/wordtree/ngrams.rb
236
+ - lib/wordtree/text.rb
260
237
  - lib/wordtree/version.rb
261
238
  - spec/fixtures/cassettes/archive_org_download_book.yml
262
239
  - spec/fixtures/library/bo/ok/book/book.1grams.json
@@ -268,33 +245,32 @@ files:
268
245
  - spec/wordtree/disk/librarian_spec.rb
269
246
  - spec/wordtree/disk/library_locator_spec.rb
270
247
  - spec/wordtree/disk/library_spec.rb
271
- - spec/wordtree/text_utils_spec.rb
248
+ - spec/wordtree/text_spec.rb
272
249
  - wordtree.gemspec
273
- homepage: ''
250
+ homepage: https://github.com/wordtreefoundation/wordtree-ruby
274
251
  licenses:
275
252
  - MIT
253
+ metadata: {}
276
254
  post_install_message:
277
255
  rdoc_options: []
278
256
  require_paths:
279
257
  - lib
280
258
  required_ruby_version: !ruby/object:Gem::Requirement
281
- none: false
282
259
  requirements:
283
- - - ! '>='
260
+ - - ">="
284
261
  - !ruby/object:Gem::Version
285
262
  version: '0'
286
263
  required_rubygems_version: !ruby/object:Gem::Requirement
287
- none: false
288
264
  requirements:
289
- - - ! '>='
265
+ - - ">="
290
266
  - !ruby/object:Gem::Version
291
267
  version: '0'
292
268
  requirements: []
293
269
  rubyforge_project:
294
- rubygems_version: 1.8.23
270
+ rubygems_version: 2.2.2
295
271
  signing_key:
296
- specification_version: 3
297
- summary: Wordtree common library code
272
+ specification_version: 4
273
+ summary: WordTree common library code
298
274
  test_files:
299
275
  - spec/fixtures/cassettes/archive_org_download_book.yml
300
276
  - spec/fixtures/library/bo/ok/book/book.1grams.json
@@ -306,5 +282,4 @@ test_files:
306
282
  - spec/wordtree/disk/librarian_spec.rb
307
283
  - spec/wordtree/disk/library_locator_spec.rb
308
284
  - spec/wordtree/disk/library_spec.rb
309
- - spec/wordtree/text_utils_spec.rb
310
- has_rdoc:
285
+ - spec/wordtree/text_spec.rb