pluskid-rmmseg-cpp 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/rmmseg/algor.cpp CHANGED
@@ -81,7 +81,9 @@ namespace rmmseg
81
81
  svwl_filter(chunks);
82
82
  if (chunks.size() > 1)
83
83
  lsdmfocw_filter(chunks);
84
-
84
+
85
+ if (chunks.size() < 1)
86
+ return Token(NULL, 0);
85
87
 
86
88
  Token token(m_text+m_pos, chunks[0].words[0]->nbytes);
87
89
  m_pos += chunks[0].words[0]->nbytes;
@@ -68,7 +68,6 @@ extern "C" {
68
68
 
69
69
  static void tk_mark(Token *t)
70
70
  {
71
- // start and end are Fixnums, no need to mark them
72
71
  rb_gc_mark(t->text);
73
72
  }
74
73
  static void tk_free(Token *t)
@@ -95,9 +94,14 @@ extern "C" {
95
94
  static VALUE cToken;
96
95
  static VALUE tk_create(const char* base, const rmmseg::Token &t)
97
96
  {
98
- Token *tk = (Token *)malloc(sizeof(Token));
97
+ Token *tk = ALLOC(Token);
99
98
  int start = t.text-base;
100
99
  tk->text = rb_str_new(t.text, t.length);
100
+
101
+ // This is necessary, see
102
+ // http://pluskid.lifegoo.com/?p=348
103
+ rb_gc_mark(tk->text);
104
+
101
105
  tk->start = INT2FIX(start);
102
106
  tk->end = INT2FIX(start + t.length);
103
107
  return Data_Wrap_Struct(cToken,
@@ -127,7 +131,7 @@ extern "C" {
127
131
  static VALUE cAlgorithm;
128
132
  static VALUE algor_create(VALUE klass, VALUE text)
129
133
  {
130
- Algorithm *algor = (Algorithm *)malloc(sizeof(Algorithm));
134
+ Algorithm *algor = ALLOC(Algorithm);
131
135
  void *mem;
132
136
  algor->text = text;
133
137
  mem = malloc(sizeof(rmmseg::Algorithm));
data/lib/rmmseg/ferret.rb CHANGED
@@ -42,7 +42,10 @@ module RMMSeg
42
42
  if tok.nil?
43
43
  return nil
44
44
  else
45
- return ::Ferret::Analysis::Token.new(tok.text, tok.start, tok.end)
45
+ @token.text = tok.text
46
+ @token.start = tok.start
47
+ @token.end = tok.end
48
+ return @token
46
49
  end
47
50
  end
48
51
 
@@ -53,6 +56,7 @@ module RMMSeg
53
56
 
54
57
  # Set the text to be tokenized
55
58
  def text=(str)
59
+ @token = ::Ferret::Analysis::Token.new("", 0, 0)
56
60
  @text = str
57
61
  @algor = Algorithm.new(@text)
58
62
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pluskid-rmmseg-cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - pluskid