wapiti 1.0.5 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -7
- data/lib/wapiti/dataset.rb +2 -2
- data/lib/wapiti/log.rb +5 -5
- data/lib/wapiti/model.rb +4 -4
- data/lib/wapiti/sequence.rb +2 -2
- data/lib/wapiti/token.rb +2 -2
- data/lib/wapiti/version.rb +1 -1
- metadata +20 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3b74106d7614faa9118a4eb41c4c93c83529811262925e5dbc4938494b5f5ea2
|
4
|
+
data.tar.gz: 99f234680ae7c6c671e82dc6b2c4ae585056dc2e921ed795e2307ee7acde9bee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b10b669fb51366ee7ebf3171d2b4ecb2d16f839864023ea96f87ca6536341556eef3078869636567623237ae3e537bd0be6186f62c3d9ce70f08aa9283fde994
|
7
|
+
data.tar.gz: 0774e1613e495969be07685fe426002a6992a9bb1da91d34b0a2716bad4486dc061a1a9dc9e70fa0ce0ed5434241fdca871ae77e8209eda7e7edd42874a3fb48
|
data/README.md
CHANGED
@@ -24,7 +24,7 @@ Quickstart
|
|
24
24
|
### Creating a Model
|
25
25
|
|
26
26
|
You can run the following examples starting the ruby interpreter (irb or pry) inside spec/fixtures directory.
|
27
|
-
|
27
|
+
|
28
28
|
Using a pattern and training data stored in a file:
|
29
29
|
|
30
30
|
require 'wapiti'
|
@@ -40,15 +40,15 @@ this class supports the default text format used by Wapiti as well as
|
|
40
40
|
additional formats (such as YAML or XML) and an API, to make it easier
|
41
41
|
to manage data sets used for input and training.
|
42
42
|
|
43
|
-
options = {threads:3, pattern: 'chpattern.txt'}
|
44
|
-
|
43
|
+
options = {threads:3, pattern: 'chpattern.txt'}
|
44
|
+
|
45
45
|
data_text = Wapiti::Dataset.open('chtrain.txt',tagged:true)
|
46
46
|
model2= Wapiti.train(data_text,options)
|
47
47
|
model2.labels
|
48
48
|
=> ["B-ADJP", "B-ADVP", "B-CONJP" ...]
|
49
49
|
|
50
|
-
options = {threads:3, pattern: 'chpattern_only_tag.txt'}
|
51
|
-
|
50
|
+
options = {threads:3, pattern: 'chpattern_only_tag.txt'}
|
51
|
+
|
52
52
|
data_xml = Wapiti::Dataset.open('chtrain.xml')
|
53
53
|
#=> #<Wapiti::Dataset sequences={823}>
|
54
54
|
model3 = Wapiti.train(data_xml, options)
|
@@ -104,7 +104,7 @@ argument to the passed-in block.
|
|
104
104
|
# => Dataset where each token will include a score
|
105
105
|
output_with_score.first.map(&:score)
|
106
106
|
# => [5.950832716249245, 8.870883529621942, ...]
|
107
|
-
|
107
|
+
|
108
108
|
### Statistics
|
109
109
|
|
110
110
|
By setting the *:check* option you can tell Wapiti to keep statistics during
|
@@ -142,7 +142,7 @@ example, fix the bug and submit a pull request.
|
|
142
142
|
|
143
143
|
License
|
144
144
|
-------
|
145
|
-
Copyright 2011-
|
145
|
+
Copyright 2011-2020 Sylvester Keil. All rights reserved.
|
146
146
|
|
147
147
|
Copyright 2009-2013 CNRS. All rights reserved.
|
148
148
|
|
data/lib/wapiti/dataset.rb
CHANGED
data/lib/wapiti/log.rb
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
module Wapiti
|
2
2
|
require 'logger'
|
3
3
|
|
4
|
-
Logger = ::Logger.new(STDERR,
|
5
|
-
:
|
6
|
-
:
|
7
|
-
:
|
4
|
+
Logger = ::Logger.new(STDERR,
|
5
|
+
level: ENV['WAPITI_LOG_LEVEL'] || :error,
|
6
|
+
progname: 'wapiti',
|
7
|
+
formatter: proc { |level, time, name, msg|
|
8
8
|
"#{level} [#{time}] #{name}: #{msg}\n"
|
9
9
|
}
|
10
|
-
|
10
|
+
)
|
11
11
|
|
12
12
|
class << self
|
13
13
|
def log
|
data/lib/wapiti/model.rb
CHANGED
@@ -30,7 +30,7 @@ module Wapiti
|
|
30
30
|
|
31
31
|
alias native_label label
|
32
32
|
|
33
|
-
def label(input, opts = nil)
|
33
|
+
def label(input, opts = nil, &block)
|
34
34
|
unless opts.nil?
|
35
35
|
original_options = options.attributes(opts.keys)
|
36
36
|
options.update!(opts)
|
@@ -39,7 +39,7 @@ module Wapiti
|
|
39
39
|
input = input.to_a(tagged: options.check) if input.is_a?(Dataset)
|
40
40
|
|
41
41
|
if block_given?
|
42
|
-
output = native_label(input, &
|
42
|
+
output = native_label(input, &block)
|
43
43
|
else
|
44
44
|
output = native_label(input)
|
45
45
|
end
|
@@ -61,14 +61,14 @@ module Wapiti
|
|
61
61
|
|
62
62
|
alias native_train train
|
63
63
|
|
64
|
-
def train(tdat, ddat = nil, opts = nil)
|
64
|
+
def train(tdat, ddat = nil, opts = nil, &block)
|
65
65
|
options.update!(opts) unless opts.nil?
|
66
66
|
|
67
67
|
tdat = tdat.to_a(tagged: true) if tdat.is_a?(Dataset)
|
68
68
|
ddat = ddat.to_a(tagged: true) if ddat.is_a?(Dataset)
|
69
69
|
|
70
70
|
if block_given?
|
71
|
-
native_train(tdat, ddat, &
|
71
|
+
native_train(tdat, ddat, &block)
|
72
72
|
else
|
73
73
|
native_train(tdat, ddat)
|
74
74
|
end
|
data/lib/wapiti/sequence.rb
CHANGED
data/lib/wapiti/token.rb
CHANGED
@@ -9,10 +9,10 @@ module Wapiti
|
|
9
9
|
class << self
|
10
10
|
def parse(string, spacer: /\s+/, tagged: false, **opts)
|
11
11
|
value, *observations = string.split(spacer)
|
12
|
-
new(value,
|
12
|
+
new(value,
|
13
13
|
label: (tagged ? observations.pop : nil).to_s,
|
14
14
|
observations: observations
|
15
|
-
|
15
|
+
)
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
data/lib/wapiti/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wapiti
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sylvester Keil
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: builder
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '3.2'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rexml
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.0'
|
27
41
|
description: This gem provides a Ruby API for Conditional Random Fields (CRF).
|
28
42
|
email:
|
29
43
|
- sylvester@keil.or.at
|
@@ -85,7 +99,7 @@ homepage: https://github.com/inukshuk/wapiti-ruby
|
|
85
99
|
licenses:
|
86
100
|
- BSD-2-Clause
|
87
101
|
metadata: {}
|
88
|
-
post_install_message:
|
102
|
+
post_install_message:
|
89
103
|
rdoc_options:
|
90
104
|
- "--line-numbers"
|
91
105
|
- "--inline-source"
|
@@ -107,9 +121,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
107
121
|
- !ruby/object:Gem::Version
|
108
122
|
version: '0'
|
109
123
|
requirements: []
|
110
|
-
|
111
|
-
|
112
|
-
signing_key:
|
124
|
+
rubygems_version: 3.2.15
|
125
|
+
signing_key:
|
113
126
|
specification_version: 4
|
114
127
|
summary: Wicked fast Conditional Random Fields for Ruby.
|
115
128
|
test_files: []
|