segment_ruby 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/data/segment_ruby/test_bigram/2_frequencies.tsv +10 -0
- data/data/segment_ruby/test_bigram/2_total.tsv +1 -0
- data/data/segment_ruby/test_bigram/frequencies.tsv +10 -0
- data/data/segment_ruby/test_bigram/total.tsv +1 -0
- data/data/segment_ruby/test_unigram/frequencies.tsv +10 -0
- data/data/segment_ruby/test_unigram/total.tsv +1 -0
- data/data/segment_ruby/us_names/2_frequencies.tsv.save +0 -0
- data/data/segment_ruby/us_names/2_total.tsv.save +1 -0
- data/data/segment_ruby/us_names/README.md +15 -0
- data/data/segment_ruby/us_names/frequencies.tsv +78637 -0
- data/data/segment_ruby/us_names/total.tsv +1 -0
- data/lib/segment_ruby/version.rb +1 -1
- data/lib/segment_ruby.rb +3 -3
- metadata +13 -2
@@ -0,0 +1 @@
|
|
1
|
+
175469698
|
data/lib/segment_ruby/version.rb
CHANGED
data/lib/segment_ruby.rb
CHANGED
@@ -68,7 +68,7 @@ module SegmentRuby
|
|
68
68
|
# bigram log probabilities
|
69
69
|
btf = total_file_name('2_')
|
70
70
|
bff = freq_file_name('2_')
|
71
|
-
@blp = (File.exists?(btf)
|
71
|
+
@blp = (File.exists?(btf) && File.exists?(bff) ? ProbabilityDistribution.new(btf, bff) : false)
|
72
72
|
end
|
73
73
|
|
74
74
|
attr_reader :blp, :max_word_length, :model_name, :ulp
|
@@ -80,7 +80,7 @@ module SegmentRuby
|
|
80
80
|
def log_CPr(w, prev)
|
81
81
|
key = [prev, w].join(' ')
|
82
82
|
|
83
|
-
blp
|
83
|
+
blp && blp.has_key?(key) ? blp.log_prob(key) : ulp.log_prob(w)
|
84
84
|
end
|
85
85
|
|
86
86
|
def total_file_name(prefix='')
|
@@ -107,7 +107,7 @@ module SegmentRuby
|
|
107
107
|
end
|
108
108
|
|
109
109
|
def segment_r(text, prev, n, memo)
|
110
|
-
return [0.0, []] if
|
110
|
+
return [0.0, []] if (!text) || (text.size == 0)
|
111
111
|
return memo[text] if memo.has_key?(text)
|
112
112
|
|
113
113
|
log_p_segment = splits(text).map do |first, rem|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: segment_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Will Fitzgerald
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-10-
|
11
|
+
date: 2016-10-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -100,11 +100,22 @@ files:
|
|
100
100
|
- data/segment_ruby/small/README.md
|
101
101
|
- data/segment_ruby/small/frequencies.tsv
|
102
102
|
- data/segment_ruby/small/total.tsv
|
103
|
+
- data/segment_ruby/test_bigram/2_frequencies.tsv
|
104
|
+
- data/segment_ruby/test_bigram/2_total.tsv
|
105
|
+
- data/segment_ruby/test_bigram/frequencies.tsv
|
106
|
+
- data/segment_ruby/test_bigram/total.tsv
|
107
|
+
- data/segment_ruby/test_unigram/frequencies.tsv
|
108
|
+
- data/segment_ruby/test_unigram/total.tsv
|
103
109
|
- data/segment_ruby/twitter/2_frequencies.tsv
|
104
110
|
- data/segment_ruby/twitter/2_total.tsv
|
105
111
|
- data/segment_ruby/twitter/README.md
|
106
112
|
- data/segment_ruby/twitter/frequencies.tsv
|
107
113
|
- data/segment_ruby/twitter/total.tsv
|
114
|
+
- data/segment_ruby/us_names/2_frequencies.tsv.save
|
115
|
+
- data/segment_ruby/us_names/2_total.tsv.save
|
116
|
+
- data/segment_ruby/us_names/README.md
|
117
|
+
- data/segment_ruby/us_names/frequencies.tsv
|
118
|
+
- data/segment_ruby/us_names/total.tsv
|
108
119
|
- lib/segment_ruby.rb
|
109
120
|
- lib/segment_ruby/version.rb
|
110
121
|
- segment_ruby.gemspec
|