wapiti 1.0.4 → 1.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.md +6 -0
- data/README.md +26 -11
- data/ext/wapiti/native.c +9 -3
- data/lib/wapiti/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d1b615815731e51a90b7043b7ad91c97f0386bf99435c78f18ab23cf9df212cc
|
4
|
+
data.tar.gz: ee11a7e6f76f382847f721f8388b9c7f314e46a90ab391b463aa7e60ef641d41
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a7852d9566407e93c9d66a641dd8551e3d4666449b9aad01ef434f822c78955d887911e59adb0418751ec4183d1ffa32504ec525fe3b16dbf05018c650c143c2
|
7
|
+
data.tar.gz: '0762885a4ef9265917756d983932da613ebb2cf32c9f18db76afcdac933ae710fe457ba8b431bdf537b986c9c54f7a9908b605627a72e000dd59f6f9cebab034'
|
data/HISTORY.md
CHANGED
@@ -1,6 +1,12 @@
|
|
1
|
+
1.0.5 / 2019-05-13
|
2
|
+
==================
|
3
|
+
* Updated examples in README.md to current API
|
4
|
+
* Fix crash when Model#label(input, check: true) is called on input with new labels
|
5
|
+
|
1
6
|
1.0.3 / 2018-07-10
|
2
7
|
==================
|
3
8
|
* Fix compile time format-security
|
9
|
+
|
4
10
|
1.0.0 / 2017-12-xx
|
5
11
|
==================
|
6
12
|
* Added support for Windows platform
|
data/README.md
CHANGED
@@ -23,9 +23,12 @@ Quickstart
|
|
23
23
|
|
24
24
|
### Creating a Model
|
25
25
|
|
26
|
+
You can run the following examples starting the ruby interpreter (irb or pry) inside spec/fixtures directory.
|
27
|
+
|
26
28
|
Using a pattern and training data stored in a file:
|
27
29
|
|
28
|
-
|
30
|
+
require 'wapiti'
|
31
|
+
model = Wapiti.train('chtrain.txt', pattern: 'chpattern.txt')
|
29
32
|
#=> #<Wapiti::Model:0x0000010188f868>
|
30
33
|
model.labels
|
31
34
|
#=> ["B-ADJP", "B-ADVP", "B-CONJP" ...]
|
@@ -34,11 +37,21 @@ Using a pattern and training data stored in a file:
|
|
34
37
|
|
35
38
|
Alternatively, you can pass in the training data as a `Wapiti::Dataset`;
|
36
39
|
this class supports the default text format used by Wapiti as well as
|
37
|
-
|
40
|
+
additional formats (such as YAML or XML) and an API, to make it easier
|
38
41
|
to manage data sets used for input and training.
|
39
42
|
|
40
|
-
|
41
|
-
|
43
|
+
options = {threads:3, pattern: 'chpattern.txt'}
|
44
|
+
|
45
|
+
data_text = Wapiti::Dataset.open('chtrain.txt',tagged:true)
|
46
|
+
model2= Wapiti.train(data_text,options)
|
47
|
+
model2.labels
|
48
|
+
=> ["B-ADJP", "B-ADVP", "B-CONJP" ...]
|
49
|
+
|
50
|
+
options = {threads:3, pattern: 'chpattern_only_tag.txt'}
|
51
|
+
|
52
|
+
data_xml = Wapiti::Dataset.open('chtrain.xml')
|
53
|
+
#=> #<Wapiti::Dataset sequences={823}>
|
54
|
+
model3 = Wapiti.train(data_xml, options)
|
42
55
|
|
43
56
|
You can consult the `Wapiti::Options.attribute_names` class for a list of
|
44
57
|
supported configuration options and `Wapiti::Options.algorithms` for
|
@@ -64,9 +77,9 @@ Before saving your model you can use `compact` to reduce the model's size:
|
|
64
77
|
|
65
78
|
By calling `#label` on a Model instance you can add labels to a dataset:
|
66
79
|
|
67
|
-
model = Wapiti.load('
|
68
|
-
input = Wapiti::Dataset.
|
69
|
-
output = model.label(input
|
80
|
+
model = Wapiti.load('ch.mod')
|
81
|
+
input = Wapiti::Dataset.open('chtrain.txt',tagged:true)
|
82
|
+
output = model.label(input)
|
70
83
|
|
71
84
|
The result is a new `Wapiti::Dataset` with the predicted labels for each
|
72
85
|
token. If your input data was already tagged, you can compare the input
|
@@ -87,9 +100,11 @@ when calling `#label`), the score for each label will be appended to
|
|
87
100
|
each token/label tuple as a floating point number or passed as a third
|
88
101
|
argument to the passed-in block.
|
89
102
|
|
90
|
-
model.label input, score: true
|
103
|
+
output_with_score = model.label input, score: true
|
91
104
|
# => Dataset where each token will include a score
|
92
|
-
|
105
|
+
output_with_score.first.map(&:score)
|
106
|
+
# => [5.950832716249245, 8.870883529621942, ...]
|
107
|
+
|
93
108
|
### Statistics
|
94
109
|
|
95
110
|
By setting the *:check* option you can tell Wapiti to keep statistics during
|
@@ -105,8 +120,8 @@ are also available through the associated attribute readers).
|
|
105
120
|
|
106
121
|
model.label input, check: true
|
107
122
|
model.stats
|
108
|
-
=> {:token=>{:count=>
|
109
|
-
|
123
|
+
=> {:token=>{:count=>19172, :errors=>36, :rate=>0.18777383684539956},
|
124
|
+
:sequence=>{:count=>823, :errors=>28, :rate=>3.402187120291616}}
|
110
125
|
|
111
126
|
For convenience, you can also use the `#check` method, which
|
112
127
|
will reset the counters, check your input, and return the stats.
|
data/ext/wapiti/native.c
CHANGED
@@ -911,7 +911,6 @@ static VALUE model_labels(VALUE self) {
|
|
911
911
|
qrk_t *lp = model->reader->lbl;
|
912
912
|
|
913
913
|
VALUE labels = rb_ary_new2(Y);
|
914
|
-
|
915
914
|
for (unsigned int i = 0; i < Y; ++i) {
|
916
915
|
rb_ary_push(labels, rb_str_new2(qrk_id2str(lp, i)));
|
917
916
|
}
|
@@ -979,14 +978,21 @@ static VALUE decode_sequence(VALUE self, mdl_t *model, raw_t *raw) {
|
|
979
978
|
}
|
980
979
|
|
981
980
|
// Statistics
|
981
|
+
|
982
982
|
if (model->opt->check) {
|
983
983
|
int err = 0;
|
984
|
+
uint32_t lbl = 0;
|
984
985
|
|
985
986
|
for (t = 0; t < T; ++t) {
|
986
|
-
|
987
|
+
lbl = seq->pos[t].lbl;
|
988
|
+
|
989
|
+
// ((uint32_t)-1) is a magic value for no asigned token
|
990
|
+
if (lbl != ((uint32_t)-1)) {
|
991
|
+
stat[0][lbl]++;
|
992
|
+
}
|
987
993
|
stat[1][out[t * N]]++;
|
988
994
|
|
989
|
-
if (
|
995
|
+
if (lbl != out[t * N]) {
|
990
996
|
terr++;
|
991
997
|
err = 1;
|
992
998
|
} else {
|
data/lib/wapiti/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wapiti
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sylvester Keil
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-05-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: builder
|