zipf 1.0.3 → 1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/zipf.rb +6 -7
- data/lib/zipf/SparseVector.rb +0 -1
- data/lib/zipf/bleu.rb +0 -2
- data/lib/zipf/dag.rb +0 -2
- data/lib/zipf/fileutil.rb +0 -1
- data/lib/zipf/{hg.rb → hypergraph.rb} +0 -3
- data/lib/zipf/misc.rb +0 -1
- data/lib/zipf/tfidf.rb +0 -1
- metadata +4 -5
- data/lib/zipf/grammar.rb +0 -123
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fdbe716da58c6e94e0f7066226378cc113799088
|
4
|
+
data.tar.gz: 9c7a4d0c26ad22a5df8e51a3b794b3bcbf348883
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 170b92d1d017ca7ac701e77723fd79b4a4a9c74414b5c6445da0929e89ff919c59006be4493b641f0a96c63b073b9c7258c65b4497f9071fbd96c209f2820ce8
|
7
|
+
data.tar.gz: 8267eeeb569ce8b9f92a2d1471825a39f8d786802caccdd9c66164e459b2648a950eb828135d0011b29d1e4e6744815ccbee48c7c71151335d04bd3f09edff8b
|
data/lib/zipf.rb
CHANGED
@@ -1,16 +1,15 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require 'zipf/
|
3
|
+
require 'zipf/bleu'
|
4
|
+
require 'zipf/dag'
|
4
5
|
require 'zipf/fileutil'
|
6
|
+
require 'zipf/hypergraph'
|
7
|
+
require 'zipf/misc'
|
8
|
+
require 'zipf/semirings'
|
5
9
|
require 'zipf/SparseVector'
|
10
|
+
require 'zipf/stringutil'
|
6
11
|
require 'zipf/tfidf'
|
7
12
|
require 'zipf/Translation'
|
8
|
-
require 'zipf/dag'
|
9
|
-
require 'zipf/semirings'
|
10
|
-
require 'zipf/bleu'
|
11
|
-
require 'zipf/misc'
|
12
|
-
require 'zipf/hg'
|
13
|
-
require 'zipf/grammar'
|
14
13
|
|
15
14
|
STDIN.set_encoding 'utf-8'
|
16
15
|
STDOUT.set_encoding 'utf-8'
|
data/lib/zipf/SparseVector.rb
CHANGED
data/lib/zipf/bleu.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
module BLEU
|
2
2
|
|
3
|
-
|
4
3
|
class BLEU::NgramCounts
|
5
4
|
attr_accessor :sum, :clipped, :ref_len, :hyp_len, :n
|
6
5
|
|
@@ -125,6 +124,5 @@ def BLEU::per_sentence_bleu hypothesis, reference, n=4, smooth=0.0
|
|
125
124
|
return Math.exp logbleu
|
126
125
|
end
|
127
126
|
|
128
|
-
|
129
127
|
end #module
|
130
128
|
|
data/lib/zipf/dag.rb
CHANGED
@@ -2,7 +2,6 @@ module DAG
|
|
2
2
|
|
3
3
|
require 'json'
|
4
4
|
|
5
|
-
|
6
5
|
class DAG::Node
|
7
6
|
attr_accessor :label, :outgoing, :incoming, :score, :mark
|
8
7
|
|
@@ -200,6 +199,5 @@ def DAG::read_graph_from_json fn, semiring=RealSemiring.new
|
|
200
199
|
return graph, nodes_by_label
|
201
200
|
end
|
202
201
|
|
203
|
-
|
204
202
|
end #module
|
205
203
|
|
data/lib/zipf/fileutil.rb
CHANGED
@@ -3,10 +3,8 @@
|
|
3
3
|
require_relative 'semirings'
|
4
4
|
require 'json'
|
5
5
|
|
6
|
-
|
7
6
|
module HG
|
8
7
|
|
9
|
-
|
10
8
|
class HG::Node
|
11
9
|
attr_accessor :label, :cat, :outgoing, :incoming, :score
|
12
10
|
|
@@ -168,6 +166,5 @@ def HG::all_paths hypergraph, root, semiring=ViterbiSemiring.new
|
|
168
166
|
return paths
|
169
167
|
end
|
170
168
|
|
171
|
-
|
172
169
|
end #module
|
173
170
|
|
data/lib/zipf/misc.rb
CHANGED
data/lib/zipf/tfidf.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zipf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: '1.1'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Patrick Simianer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-09-27 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: NLP related tools and classes
|
14
14
|
email: p@simianer.de
|
@@ -19,12 +19,11 @@ files:
|
|
19
19
|
- lib/zipf.rb
|
20
20
|
- lib/zipf/stringutil.rb
|
21
21
|
- lib/zipf/misc.rb
|
22
|
-
- lib/zipf/grammar.rb
|
23
|
-
- lib/zipf/hg.rb
|
24
22
|
- lib/zipf/fileutil.rb
|
25
23
|
- lib/zipf/semirings.rb
|
26
24
|
- lib/zipf/dag.rb
|
27
25
|
- lib/zipf/SparseVector.rb
|
26
|
+
- lib/zipf/hypergraph.rb
|
28
27
|
- lib/zipf/tfidf.rb
|
29
28
|
- lib/zipf/bleu.rb
|
30
29
|
- lib/zipf/Translation.rb
|
@@ -48,7 +47,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
48
47
|
version: '0'
|
49
48
|
requirements: []
|
50
49
|
rubyforge_project:
|
51
|
-
rubygems_version: 2.0.
|
50
|
+
rubygems_version: 2.0.14
|
52
51
|
signing_key:
|
53
52
|
specification_version: 4
|
54
53
|
summary: zipf
|
data/lib/zipf/grammar.rb
DELETED
@@ -1,123 +0,0 @@
|
|
1
|
-
module Grammar
|
2
|
-
|
3
|
-
|
4
|
-
class T
|
5
|
-
attr_accessor :word
|
6
|
-
|
7
|
-
def initialize word
|
8
|
-
@word = word
|
9
|
-
end
|
10
|
-
|
11
|
-
def to_s
|
12
|
-
"T<#{@word}>"
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
class NT
|
17
|
-
attr_accessor :symbol, :index, :span
|
18
|
-
|
19
|
-
def initialize symbol, index=0
|
20
|
-
@symbol = symbol
|
21
|
-
@index = index
|
22
|
-
@span = Span.new
|
23
|
-
end
|
24
|
-
|
25
|
-
def to_s
|
26
|
-
"NT(#{@span.left},#{@span.right})<#{@symbol},#{@index}>"
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
class Rule
|
31
|
-
attr_accessor :lhs, :rhs, :e
|
32
|
-
|
33
|
-
def initialize lhs=nil, rhs=[], e=''
|
34
|
-
@lhs = lhs
|
35
|
-
@rhs = rhs
|
36
|
-
@e = e
|
37
|
-
end
|
38
|
-
|
39
|
-
def to_s
|
40
|
-
"#{lhs} -> #{rhs.map{ |i| i.to_s }.join ' '} [arity=#{arity}] ||| #{@e}"
|
41
|
-
end
|
42
|
-
|
43
|
-
def arity
|
44
|
-
rhs.select { |i| i.class == NT }.size
|
45
|
-
end
|
46
|
-
|
47
|
-
def from_s s
|
48
|
-
_ = splitpipe s, 3
|
49
|
-
@lhs = NT.new _[0].strip.gsub!(/(\[|\])/, "")
|
50
|
-
_[1].split.each { |x|
|
51
|
-
x.strip!
|
52
|
-
if x[0]=='[' && x[x.size-1] == ']'
|
53
|
-
@rhs << NT.new(x.gsub!(/(\[|\])/, "").split(',')[0])
|
54
|
-
else
|
55
|
-
@rhs << T.new(x)
|
56
|
-
end
|
57
|
-
}
|
58
|
-
@e = _[2]
|
59
|
-
end
|
60
|
-
|
61
|
-
def self.from_s s
|
62
|
-
r = self.new
|
63
|
-
r.from_s s
|
64
|
-
return r
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
class Span
|
69
|
-
attr_accessor :left, :right
|
70
|
-
|
71
|
-
def initialize left=nil, right=nil
|
72
|
-
@left = left
|
73
|
-
@right = right
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
class Grammar
|
78
|
-
attr_accessor :rules, :startn, :startt, :flat
|
79
|
-
|
80
|
-
def initialize fn
|
81
|
-
@rules = []; @startn = []; @startt = [] ;@flat = []
|
82
|
-
ReadFile.readlines_strip(fn).each_with_index { |s,i|
|
83
|
-
STDERR.write '.'; STDERR.write " #{i+1}\n" if (i+1)%80==0
|
84
|
-
@rules << Rule.from_s(s)
|
85
|
-
if @rules.last.rhs.first.class == NT
|
86
|
-
@startn << @rules.last
|
87
|
-
else
|
88
|
-
if rules.last.arity == 0
|
89
|
-
@flat << @rules.last
|
90
|
-
else
|
91
|
-
@startt << @rules.last
|
92
|
-
end
|
93
|
-
end
|
94
|
-
}
|
95
|
-
STDERR.write "\n"
|
96
|
-
end
|
97
|
-
|
98
|
-
def to_s
|
99
|
-
s = ''
|
100
|
-
@rules.each { |r| s += r.to_s+"\n" }
|
101
|
-
return s
|
102
|
-
end
|
103
|
-
|
104
|
-
def add_glue_rules
|
105
|
-
@rules.map { |r| r.lhs.symbol }.select { |s| s != 'S' }.uniq.each { |symbol|
|
106
|
-
@rules << Rule.new(NT.new('S'), [NT.new(symbol)])
|
107
|
-
@startn << @rules.last
|
108
|
-
@rules << Rule.new(NT.new('S'), [NT.new('S'), NT.new('X')])
|
109
|
-
@startn << @rules.last
|
110
|
-
}
|
111
|
-
end
|
112
|
-
|
113
|
-
def add_pass_through_rules s
|
114
|
-
s.each { |word|
|
115
|
-
@rules << Rule.new(NT.new('X'), [T.new(word)])
|
116
|
-
@flat << @rules.last
|
117
|
-
}
|
118
|
-
end
|
119
|
-
end
|
120
|
-
|
121
|
-
|
122
|
-
end #module
|
123
|
-
|