crfpp 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/crfpp/native.cpp +5 -1
- data/lib/crfpp.rb +3 -1
- data/lib/crfpp/data.rb +63 -0
- data/lib/crfpp/filelike.rb +6 -3
- data/lib/crfpp/model.rb +10 -3
- data/lib/crfpp/template.rb +3 -0
- data/lib/crfpp/token.rb +31 -0
- data/lib/crfpp/utilities.rb +7 -18
- data/lib/crfpp/version.rb +1 -1
- data/test/crfpp/test_data.rb +23 -0
- metadata +11 -7
data/ext/crfpp/native.cpp
CHANGED
@@ -24,11 +24,15 @@ static VALUE learn(VALUE klass, VALUE arguments) {
|
|
24
24
|
}
|
25
25
|
|
26
26
|
// start training
|
27
|
-
result = crfpp_learn(argc, argv)
|
27
|
+
result = crfpp_learn(argc, argv);
|
28
28
|
|
29
29
|
// free allocated memory
|
30
30
|
free(tmp);
|
31
31
|
|
32
|
+
if (result < 0) {
|
33
|
+
rb_raise(rb_eStandardError, "crfpp_learn failed");
|
34
|
+
}
|
35
|
+
|
32
36
|
return result;
|
33
37
|
}
|
34
38
|
|
data/lib/crfpp.rb
CHANGED
@@ -5,9 +5,11 @@ require 'tempfile'
|
|
5
5
|
require 'crfpp/version'
|
6
6
|
require 'crfpp/errors'
|
7
7
|
require 'crfpp/filelike'
|
8
|
+
require 'crfpp/token'
|
9
|
+
require 'crfpp/data'
|
8
10
|
require 'crfpp/macro'
|
9
|
-
require 'crfpp/template'
|
10
11
|
require 'crfpp/feature'
|
12
|
+
require 'crfpp/template'
|
11
13
|
require 'crfpp/model'
|
12
14
|
require 'crfpp/native'
|
13
15
|
require 'crfpp/utilities'
|
data/lib/crfpp/data.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
module CRFPP
|
2
|
+
|
3
|
+
#
|
4
|
+
# A Data object represents test or training data.
|
5
|
+
#
|
6
|
+
class Data
|
7
|
+
|
8
|
+
extend Forwardable
|
9
|
+
|
10
|
+
include Enumerable
|
11
|
+
include Filelike
|
12
|
+
|
13
|
+
attr_reader :sentences
|
14
|
+
|
15
|
+
def_delegators :@sentences, :length, :[], :each
|
16
|
+
|
17
|
+
def initialize(path = nil)
|
18
|
+
@path = path
|
19
|
+
open
|
20
|
+
end
|
21
|
+
|
22
|
+
def open
|
23
|
+
clear
|
24
|
+
|
25
|
+
read.lines.each do |line|
|
26
|
+
line.chomp!
|
27
|
+
if line.strip.empty?
|
28
|
+
new_sentence
|
29
|
+
else
|
30
|
+
push Token.parse(line)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
self
|
35
|
+
end
|
36
|
+
|
37
|
+
def clear
|
38
|
+
@sentences = [[]]
|
39
|
+
self
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_s
|
43
|
+
empty? ? '' : zip([]).flatten.join("\n")
|
44
|
+
end
|
45
|
+
|
46
|
+
def push(feature)
|
47
|
+
@sentences.last << feature
|
48
|
+
self
|
49
|
+
end
|
50
|
+
|
51
|
+
alias << push
|
52
|
+
|
53
|
+
def empty?
|
54
|
+
[@sentences].flatten(2).compact.empty?
|
55
|
+
end
|
56
|
+
|
57
|
+
def new_sentence
|
58
|
+
@sentences << []
|
59
|
+
self
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
data/lib/crfpp/filelike.rb
CHANGED
@@ -8,13 +8,16 @@ module CRFPP
|
|
8
8
|
@path ||= Tempfile.new('filelike').path
|
9
9
|
end
|
10
10
|
|
11
|
-
def write
|
12
|
-
File.open(
|
13
|
-
f.write(
|
11
|
+
def write(file = path, content = to_s)
|
12
|
+
File.open(file, 'w:UTF-8') do |f|
|
13
|
+
f.write(content)
|
14
14
|
f.close
|
15
15
|
end
|
16
|
+
self
|
16
17
|
end
|
17
18
|
|
19
|
+
alias save write
|
20
|
+
|
18
21
|
def read
|
19
22
|
f = File.open(path, 'r:UTF-8')
|
20
23
|
f.read
|
data/lib/crfpp/model.rb
CHANGED
data/lib/crfpp/template.rb
CHANGED
@@ -32,6 +32,7 @@ module CRFPP
|
|
32
32
|
|
33
33
|
def clear
|
34
34
|
@sentences = [[]]
|
35
|
+
self
|
35
36
|
end
|
36
37
|
|
37
38
|
def to_s
|
@@ -43,6 +44,7 @@ module CRFPP
|
|
43
44
|
|
44
45
|
def push(feature)
|
45
46
|
@sentences.last << feature
|
47
|
+
self
|
46
48
|
end
|
47
49
|
|
48
50
|
alias << push
|
@@ -53,6 +55,7 @@ module CRFPP
|
|
53
55
|
|
54
56
|
def new_sentence
|
55
57
|
@sentences << []
|
58
|
+
self
|
56
59
|
end
|
57
60
|
|
58
61
|
end
|
data/lib/crfpp/token.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
module CRFPP
|
2
|
+
class Token
|
3
|
+
|
4
|
+
attr_accessor :word, :tags
|
5
|
+
|
6
|
+
def self.parse(string)
|
7
|
+
tokens = string.split(/\s+/).compact
|
8
|
+
tokens.empty? ? nil : new(tokens)
|
9
|
+
end
|
10
|
+
|
11
|
+
# Creates a new Token instance.
|
12
|
+
#
|
13
|
+
# call-seq:
|
14
|
+
# Token.new(word)
|
15
|
+
# Token.new(word, tags)
|
16
|
+
# Token.new(word, tag, tag, tag ...)
|
17
|
+
#
|
18
|
+
def initialize(*arguments)
|
19
|
+
@word, *@tags = *arguments.flatten
|
20
|
+
end
|
21
|
+
|
22
|
+
def answer
|
23
|
+
@tags[-1]
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_s
|
27
|
+
[@word.to_s, @tags].flatten.join(' ')
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
data/lib/crfpp/utilities.rb
CHANGED
@@ -3,9 +3,6 @@ module CRFPP
|
|
3
3
|
|
4
4
|
# Creates a new Model based on a template and training data.
|
5
5
|
#
|
6
|
-
# The data parameter can either be an array of strings or a filename. The
|
7
|
-
# possible options are:
|
8
|
-
#
|
9
6
|
# :threads: False or the number of threads to us (default is 2).
|
10
7
|
#
|
11
8
|
# :algorithm: L1 or L2 (default)
|
@@ -26,14 +23,7 @@ module CRFPP
|
|
26
23
|
def learn(template, data, options = {})
|
27
24
|
options = { :threads => 2, :algorithm => :L2, :cost => 1.0, :frequency => 1}.merge(options)
|
28
25
|
|
29
|
-
|
30
|
-
data = save_data_to_tempfile([data].flatten)
|
31
|
-
temporary = true
|
32
|
-
end
|
33
|
-
|
34
|
-
template = Template.new(template) unless template.is_a?(Template)
|
35
|
-
model = Model.new
|
36
|
-
|
26
|
+
model = Model.new
|
37
27
|
arguments = []
|
38
28
|
|
39
29
|
# TODO check algorithm names
|
@@ -43,16 +33,15 @@ module CRFPP
|
|
43
33
|
arguments << "--thread=#{options[:threads]}"
|
44
34
|
arguments << "--freq=#{options[:frequency]}"
|
45
35
|
|
46
|
-
arguments << template.path
|
47
|
-
arguments << data
|
36
|
+
arguments << (template.respond_to?(:path) ? template.path : template)
|
37
|
+
arguments << (data.respond_to?(:path) ? data.path : data)
|
48
38
|
arguments << model.path
|
49
|
-
|
50
|
-
|
51
|
-
raise NativeError, 'crfpp learn failed' unless success
|
39
|
+
|
40
|
+
Native.learn(arguments.join(' '))
|
52
41
|
|
53
42
|
model
|
54
|
-
|
55
|
-
|
43
|
+
rescue => error
|
44
|
+
raise NativeError, error.message
|
56
45
|
end
|
57
46
|
|
58
47
|
alias train learn
|
data/lib/crfpp/version.rb
CHANGED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
module CRFPP
|
4
|
+
|
5
|
+
class TestData < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_load_data_from_empty_file
|
8
|
+
file = Tempfile.new('template')
|
9
|
+
assert Data.new(file.path).to_s.empty?
|
10
|
+
ensure
|
11
|
+
file.close
|
12
|
+
file.unlink
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_load_data_from_file
|
16
|
+
path = "#{FixturesRoot}/test.data"
|
17
|
+
assert_equal 'Rockwell NNP B-NP', Data.new(path)[0][0].to_s
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crfpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2011-08-18 00:00:00.000000000Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
16
|
-
requirement: &
|
16
|
+
requirement: &2157284480 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.9'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2157284480
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rake-compiler
|
27
|
-
requirement: &
|
27
|
+
requirement: &2157283860 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0.7'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2157283860
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: ZenTest
|
38
|
-
requirement: &
|
38
|
+
requirement: &2157283280 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: '4.6'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2157283280
|
47
47
|
description: A Ruby extension to interface with CRF++, the Conditional Random Fields
|
48
48
|
library written in C++. You need to install libcrfpp to use this gem.
|
49
49
|
email:
|
@@ -67,14 +67,17 @@ files:
|
|
67
67
|
- ext/crfpp/tagger.cpp
|
68
68
|
- ext/crfpp/tagger.hpp
|
69
69
|
- lib/crfpp.rb
|
70
|
+
- lib/crfpp/data.rb
|
70
71
|
- lib/crfpp/errors.rb
|
71
72
|
- lib/crfpp/feature.rb
|
72
73
|
- lib/crfpp/filelike.rb
|
73
74
|
- lib/crfpp/macro.rb
|
74
75
|
- lib/crfpp/model.rb
|
75
76
|
- lib/crfpp/template.rb
|
77
|
+
- lib/crfpp/token.rb
|
76
78
|
- lib/crfpp/utilities.rb
|
77
79
|
- lib/crfpp/version.rb
|
80
|
+
- test/crfpp/test_data.rb
|
78
81
|
- test/crfpp/test_feature.rb
|
79
82
|
- test/crfpp/test_filelike.rb
|
80
83
|
- test/crfpp/test_macro.rb
|
@@ -119,6 +122,7 @@ signing_key:
|
|
119
122
|
specification_version: 3
|
120
123
|
summary: Conditional Random Fields for Ruby.
|
121
124
|
test_files:
|
125
|
+
- test/crfpp/test_data.rb
|
122
126
|
- test/crfpp/test_feature.rb
|
123
127
|
- test/crfpp/test_filelike.rb
|
124
128
|
- test/crfpp/test_macro.rb
|