wapiti 1.0.4 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eefd2c624bb02b635f41b9577d303abc352783d2530ef42f3db7f91db2384174
4
- data.tar.gz: 73c766d6e05599b5167743dfc53daf20f74db0af0fe22d3ea6a947e9882189ab
3
+ metadata.gz: d1b615815731e51a90b7043b7ad91c97f0386bf99435c78f18ab23cf9df212cc
4
+ data.tar.gz: ee11a7e6f76f382847f721f8388b9c7f314e46a90ab391b463aa7e60ef641d41
5
5
  SHA512:
6
- metadata.gz: 012c48b99ce4d6af1223f97fd03308f5af0c833e4db250842546b7f75de830fa700f71d52043fdd6c2b54f1f24cc83c0a0929211bf23153113d6efadf619fb68
7
- data.tar.gz: 1a78e6de9025f6f6e199ff70e0973a7aeb42beda218220f3c63801a8239cd5371bf4546796db82134646d4491fa9cf00072371cfe058a4632ab3946b621a32bd
6
+ metadata.gz: a7852d9566407e93c9d66a641dd8551e3d4666449b9aad01ef434f822c78955d887911e59adb0418751ec4183d1ffa32504ec525fe3b16dbf05018c650c143c2
7
+ data.tar.gz: '0762885a4ef9265917756d983932da613ebb2cf32c9f18db76afcdac933ae710fe457ba8b431bdf537b986c9c54f7a9908b605627a72e000dd59f6f9cebab034'
data/HISTORY.md CHANGED
@@ -1,6 +1,12 @@
1
+ 1.0.5 / 2019-05-13
2
+ ==================
3
+ * Updated examples in README.md to current API
4
+ * Fix crash when Model#label(input, check: true) is called on input with new labels
5
+
1
6
  1.0.3 / 2018-07-10
2
7
  ==================
3
8
  * Fix compile time format-security
9
+
4
10
  1.0.0 / 2017-12-xx
5
11
  ==================
6
12
  * Added support for Windows platform
data/README.md CHANGED
@@ -23,9 +23,12 @@ Quickstart
23
23
 
24
24
  ### Creating a Model
25
25
 
26
+ You can run the following examples starting the ruby interpreter (irb or pry) inside spec/fixtures directory.
27
+
26
28
  Using a pattern and training data stored in a file:
27
29
 
28
- model = Wapiti.train('train.txt', pattern: 'pattern.txt')
30
+ require 'wapiti'
31
+ model = Wapiti.train('chtrain.txt', pattern: 'chpattern.txt')
29
32
  #=> #<Wapiti::Model:0x0000010188f868>
30
33
  model.labels
31
34
  #=> ["B-ADJP", "B-ADVP", "B-CONJP" ...]
@@ -34,11 +37,21 @@ Using a pattern and training data stored in a file:
34
37
 
35
38
  Alternatively, you can pass in the training data as a `Wapiti::Dataset`;
36
39
  this class supports the default text format used by Wapiti as well as
37
- additiional formats (such as YAML or XML) and an API to make it easier
40
+ additional formats (such as YAML or XML) and an API, to make it easier
38
41
  to manage data sets used for input and training.
39
42
 
40
- data = Wapiti::Dataset.open('chtrain.xml')
41
- model = Wapiti.train(data, options)
43
+ options = {threads:3, pattern: 'chpattern.txt'}
44
+
45
+ data_text = Wapiti::Dataset.open('chtrain.txt',tagged:true)
46
+ model2= Wapiti.train(data_text,options)
47
+ model2.labels
48
+ => ["B-ADJP", "B-ADVP", "B-CONJP" ...]
49
+
50
+ options = {threads:3, pattern: 'chpattern_only_tag.txt'}
51
+
52
+ data_xml = Wapiti::Dataset.open('chtrain.xml')
53
+ #=> #<Wapiti::Dataset sequences={823}>
54
+ model3 = Wapiti.train(data_xml, options)
42
55
 
43
56
  You can consult the `Wapiti::Options.attribute_names` class for a list of
44
57
  supported configuration options and `Wapiti::Options.algorithms` for
@@ -64,9 +77,9 @@ Before saving your model you can use `compact` to reduce the model's size:
64
77
 
65
78
  By calling `#label` on a Model instance you can add labels to a dataset:
66
79
 
67
- model = Wapiti.load('m2.mod')
68
- input = Wapiti::Dataset.load('chtest.txt')
69
- output = model.label(input, tagged: true)
80
+ model = Wapiti.load('ch.mod')
81
+ input = Wapiti::Dataset.open('chtrain.txt',tagged:true)
82
+ output = model.label(input)
70
83
 
71
84
  The result is a new `Wapiti::Dataset` with the predicted labels for each
72
85
  token. If your input data was already tagged, you can compare the input
@@ -87,9 +100,11 @@ when calling `#label`), the score for each label will be appended to
87
100
  each token/label tuple as a floating point number or passed as a third
88
101
  argument to the passed-in block.
89
102
 
90
- model.label input, score: true
103
+ output_with_score = model.label input, score: true
91
104
  # => Dataset where each token will include a score
92
-
105
+ output_with_score.first.map(&:score)
106
+ # => [5.950832716249245, 8.870883529621942, ...]
107
+
93
108
  ### Statistics
94
109
 
95
110
  By setting the *:check* option you can tell Wapiti to keep statistics during
@@ -105,8 +120,8 @@ are also available through the associated attribute readers).
105
120
 
106
121
  model.label input, check: true
107
122
  model.stats
108
- => {:token=>{:count=>1896, :errors=>137, :rate=>7.225738396624472},
109
- :sequence=>{:count=>77, :errors=>50, :rate=>64.93506493506494}}
123
+ => {:token=>{:count=>19172, :errors=>36, :rate=>0.18777383684539956},
124
+ :sequence=>{:count=>823, :errors=>28, :rate=>3.402187120291616}}
110
125
 
111
126
  For convenience, you can also use the `#check` method, which
112
127
  will reset the counters, check your input, and return the stats.
@@ -911,7 +911,6 @@ static VALUE model_labels(VALUE self) {
911
911
  qrk_t *lp = model->reader->lbl;
912
912
 
913
913
  VALUE labels = rb_ary_new2(Y);
914
-
915
914
  for (unsigned int i = 0; i < Y; ++i) {
916
915
  rb_ary_push(labels, rb_str_new2(qrk_id2str(lp, i)));
917
916
  }
@@ -979,14 +978,21 @@ static VALUE decode_sequence(VALUE self, mdl_t *model, raw_t *raw) {
979
978
  }
980
979
 
981
980
  // Statistics
981
+
982
982
  if (model->opt->check) {
983
983
  int err = 0;
984
+ uint32_t lbl = 0;
984
985
 
985
986
  for (t = 0; t < T; ++t) {
986
- stat[0][seq->pos[t].lbl]++;
987
+ lbl = seq->pos[t].lbl;
988
+
989
+ // ((uint32_t)-1) is a magic value for no asigned token
990
+ if (lbl != ((uint32_t)-1)) {
991
+ stat[0][lbl]++;
992
+ }
987
993
  stat[1][out[t * N]]++;
988
994
 
989
- if (seq->pos[t].lbl != out[t * N]) {
995
+ if (lbl != out[t * N]) {
990
996
  terr++;
991
997
  err = 1;
992
998
  } else {
@@ -1,3 +1,3 @@
1
1
  module Wapiti
2
- VERSION = '1.0.4'.freeze
2
+ VERSION = '1.0.5'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wapiti
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 1.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sylvester Keil
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-14 00:00:00.000000000 Z
11
+ date: 2019-05-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: builder