wordtriez 0.1.3 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c086defc8f9d26af3174ab0fe8f6abeec00add65
4
- data.tar.gz: 1093097d455ed7b082c7da626c923a221e41853d
3
+ metadata.gz: 1166b687e599b75c7c6423e177e661e6e01557fc
4
+ data.tar.gz: 7157ecc077b1be637ceb067bae8e170acef367bf
5
5
  SHA512:
6
- metadata.gz: 09bbe2b50bda31e775d17906b4a448e505e8f676bfb84e37d36186afaba706d6e3de0567e49f0ab37440eabcfa6a44176c1ee3a6ba173287022849901a1adeef
7
- data.tar.gz: 46f914ec077291c2f5d7b749799f0179ab42208894622da5fcb4dde360c992082faa747faec6a56a949d735bfbd78f0c6c21ec7e653c253832479ec5b9ec724a
6
+ metadata.gz: a674980638b76c5ad7893897d945986b56634b49055b807cded3bc367a05917a207f7ea92a731692a74a4335580b758ac31398466654b01a3a3e291fe13fea1c
7
+ data.tar.gz: 4c113c405f97f8033a11e0f501c4e3043466ade43717dc762347c5283d5064a00096b1b27f578940ee4a97978a72977a79651755a81c3342c2c5b6059b31cb2c
data/ext/hat-trie/text.c CHANGED
@@ -15,16 +15,16 @@
15
15
  *
16
16
  * Spaces indicate word boundaries, while periods indicate sentence boundaries.
17
17
  */
18
- size_t text_clean(char* text)
18
+ size_t text_clean(char* text, size_t len)
19
19
  {
20
- if (*text == '\0') return 0;
20
+ if (len == 0) return 0;
21
21
 
22
22
  char* read;
23
23
  char* write = text;
24
24
  uint8_t join_lines = false,
25
25
  just_added_space = true, // prevent prefix spaces
26
26
  just_added_period = false;
27
- for (read=text; *read; read++) {
27
+ for (read=text; read<text+len; read++) {
28
28
  char c = *read;
29
29
  if (c >= 'A' && c <= 'Z') {
30
30
  // Change upper case to lowercase
@@ -70,7 +70,7 @@ size_t text_clean(char* text)
70
70
  void add_ngrams(hattrie_t* trie, int upto_n, char* text, size_t text_len, uint8_t incr_existing_keys_only)
71
71
  {
72
72
  char blank_suffix[] = "\0";
73
- add_ngrams_with_suffix(trie, upto_n, text, text_len, blank_suffix, incr_existing_keys_only);
73
+ add_ngrams_with_suffix(trie, upto_n, text, text_len, blank_suffix, 0, incr_existing_keys_only);
74
74
  }
75
75
 
76
76
  static inline void incr_value(
@@ -101,7 +101,7 @@ static inline void incr_value(
101
101
 
102
102
  }
103
103
 
104
- void add_ngrams_with_suffix(hattrie_t* trie, int upto_n, char* text, size_t text_len, char* suffix, uint8_t incr_existing_keys_only)
104
+ void add_ngrams_with_suffix(hattrie_t* trie, int upto_n, char* text, size_t text_len, char* suffix, size_t suffix_len, uint8_t incr_existing_keys_only)
105
105
  {
106
106
  char* head = text;
107
107
  char* tail = text;
@@ -112,10 +112,9 @@ void add_ngrams_with_suffix(hattrie_t* trie, int upto_n, char* text, size_t text
112
112
  if (text_len == 0) return;
113
113
 
114
114
  char buffer[NGRAM_BUFFER_SIZE];
115
- size_t suffix_len = strlen(suffix);
116
115
  size_t buffer_offset = NGRAM_BUFFER_SIZE - suffix_len - 1;
117
116
  char* buffer_pre = buffer + buffer_offset;
118
- strcpy(buffer_pre, suffix);
117
+ memcpy(buffer_pre, suffix, suffix_len);
119
118
 
120
119
  do {
121
120
  if (*tail == ' ' || *tail == '.' || tail >= head+text_len) {
data/ext/hat-trie/text.h CHANGED
@@ -11,9 +11,9 @@ extern "C" {
11
11
 
12
12
  #define NGRAM_BUFFER_SIZE 4096
13
13
 
14
- size_t text_clean(char* text);
14
+ size_t text_clean(char* text, size_t len);
15
15
  void add_ngrams(hattrie_t* trie, int upto_n, char* text, size_t text_len, uint8_t incr_existing_keys_only);
16
- void add_ngrams_with_suffix(hattrie_t* trie, int upto_n, char* text, size_t text_len, char* suffix, uint8_t incr_existing_keys_only);
16
+ void add_ngrams_with_suffix(hattrie_t* trie, int upto_n, char* text, size_t text_len, char* suffix, size_t suffix_len, uint8_t incr_existing_keys_only);
17
17
 
18
18
  #ifdef __cplusplus
19
19
  }
data/ext/wordtriez.cc CHANGED
@@ -276,8 +276,7 @@ static VALUE hat_walk(VALUE self, VALUE key) {
276
276
  static VALUE hat_text_clean(VALUE self, VALUE text) {
277
277
  rb_str_modify(text);
278
278
 
279
- char* ctext = StringValueCStr(text);
280
- size_t new_length = text_clean(ctext);
279
+ size_t new_length = text_clean(RSTRING_PTR(text), RSTRING_LEN(text));
281
280
 
282
281
  rb_str_set_len(text, (long)new_length);
283
282
 
@@ -296,7 +295,8 @@ static VALUE hat_add_text(VALUE self, VALUE text, VALUE ngrams, VALUE suffix, VA
296
295
  FIX2INT(ngrams),
297
296
  RSTRING_PTR(text),
298
297
  RSTRING_LEN(text),
299
- StringValueCStr(suffix),
298
+ RSTRING_PTR(suffix),
299
+ RSTRING_LEN(suffix),
300
300
  RTEST(incr_existing_keys_only));
301
301
 
302
302
  return self;
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wordtriez
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Zete Lui
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-09-25 00:00:00.000000000 Z
12
+ date: 2014-09-28 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: fast, efficient, unicode aware HAT trie with prefix / suffix support.
15
15
  email: