wordtriez 0.1.3 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c086defc8f9d26af3174ab0fe8f6abeec00add65
4
- data.tar.gz: 1093097d455ed7b082c7da626c923a221e41853d
3
+ metadata.gz: 1166b687e599b75c7c6423e177e661e6e01557fc
4
+ data.tar.gz: 7157ecc077b1be637ceb067bae8e170acef367bf
5
5
  SHA512:
6
- metadata.gz: 09bbe2b50bda31e775d17906b4a448e505e8f676bfb84e37d36186afaba706d6e3de0567e49f0ab37440eabcfa6a44176c1ee3a6ba173287022849901a1adeef
7
- data.tar.gz: 46f914ec077291c2f5d7b749799f0179ab42208894622da5fcb4dde360c992082faa747faec6a56a949d735bfbd78f0c6c21ec7e653c253832479ec5b9ec724a
6
+ metadata.gz: a674980638b76c5ad7893897d945986b56634b49055b807cded3bc367a05917a207f7ea92a731692a74a4335580b758ac31398466654b01a3a3e291fe13fea1c
7
+ data.tar.gz: 4c113c405f97f8033a11e0f501c4e3043466ade43717dc762347c5283d5064a00096b1b27f578940ee4a97978a72977a79651755a81c3342c2c5b6059b31cb2c
data/ext/hat-trie/text.c CHANGED
@@ -15,16 +15,16 @@
15
15
  *
16
16
  * Spaces indicate word boundaries, while periods indicate sentence boundaries.
17
17
  */
18
- size_t text_clean(char* text)
18
+ size_t text_clean(char* text, size_t len)
19
19
  {
20
- if (*text == '\0') return 0;
20
+ if (len == 0) return 0;
21
21
 
22
22
  char* read;
23
23
  char* write = text;
24
24
  uint8_t join_lines = false,
25
25
  just_added_space = true, // prevent prefix spaces
26
26
  just_added_period = false;
27
- for (read=text; *read; read++) {
27
+ for (read=text; read<text+len; read++) {
28
28
  char c = *read;
29
29
  if (c >= 'A' && c <= 'Z') {
30
30
  // Change upper case to lowercase
@@ -70,7 +70,7 @@ size_t text_clean(char* text)
70
70
  void add_ngrams(hattrie_t* trie, int upto_n, char* text, size_t text_len, uint8_t incr_existing_keys_only)
71
71
  {
72
72
  char blank_suffix[] = "\0";
73
- add_ngrams_with_suffix(trie, upto_n, text, text_len, blank_suffix, incr_existing_keys_only);
73
+ add_ngrams_with_suffix(trie, upto_n, text, text_len, blank_suffix, 0, incr_existing_keys_only);
74
74
  }
75
75
 
76
76
  static inline void incr_value(
@@ -101,7 +101,7 @@ static inline void incr_value(
101
101
 
102
102
  }
103
103
 
104
- void add_ngrams_with_suffix(hattrie_t* trie, int upto_n, char* text, size_t text_len, char* suffix, uint8_t incr_existing_keys_only)
104
+ void add_ngrams_with_suffix(hattrie_t* trie, int upto_n, char* text, size_t text_len, char* suffix, size_t suffix_len, uint8_t incr_existing_keys_only)
105
105
  {
106
106
  char* head = text;
107
107
  char* tail = text;
@@ -112,10 +112,9 @@ void add_ngrams_with_suffix(hattrie_t* trie, int upto_n, char* text, size_t text
112
112
  if (text_len == 0) return;
113
113
 
114
114
  char buffer[NGRAM_BUFFER_SIZE];
115
- size_t suffix_len = strlen(suffix);
116
115
  size_t buffer_offset = NGRAM_BUFFER_SIZE - suffix_len - 1;
117
116
  char* buffer_pre = buffer + buffer_offset;
118
- strcpy(buffer_pre, suffix);
117
+ memcpy(buffer_pre, suffix, suffix_len);
119
118
 
120
119
  do {
121
120
  if (*tail == ' ' || *tail == '.' || tail >= head+text_len) {
data/ext/hat-trie/text.h CHANGED
@@ -11,9 +11,9 @@ extern "C" {
11
11
 
12
12
  #define NGRAM_BUFFER_SIZE 4096
13
13
 
14
- size_t text_clean(char* text);
14
+ size_t text_clean(char* text, size_t len);
15
15
  void add_ngrams(hattrie_t* trie, int upto_n, char* text, size_t text_len, uint8_t incr_existing_keys_only);
16
- void add_ngrams_with_suffix(hattrie_t* trie, int upto_n, char* text, size_t text_len, char* suffix, uint8_t incr_existing_keys_only);
16
+ void add_ngrams_with_suffix(hattrie_t* trie, int upto_n, char* text, size_t text_len, char* suffix, size_t suffix_len, uint8_t incr_existing_keys_only);
17
17
 
18
18
  #ifdef __cplusplus
19
19
  }
data/ext/wordtriez.cc CHANGED
@@ -276,8 +276,7 @@ static VALUE hat_walk(VALUE self, VALUE key) {
276
276
  static VALUE hat_text_clean(VALUE self, VALUE text) {
277
277
  rb_str_modify(text);
278
278
 
279
- char* ctext = StringValueCStr(text);
280
- size_t new_length = text_clean(ctext);
279
+ size_t new_length = text_clean(RSTRING_PTR(text), RSTRING_LEN(text));
281
280
 
282
281
  rb_str_set_len(text, (long)new_length);
283
282
 
@@ -296,7 +295,8 @@ static VALUE hat_add_text(VALUE self, VALUE text, VALUE ngrams, VALUE suffix, VA
296
295
  FIX2INT(ngrams),
297
296
  RSTRING_PTR(text),
298
297
  RSTRING_LEN(text),
299
- StringValueCStr(suffix),
298
+ RSTRING_PTR(suffix),
299
+ RSTRING_LEN(suffix),
300
300
  RTEST(incr_existing_keys_only));
301
301
 
302
302
  return self;
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wordtriez
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Zete Lui
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-09-25 00:00:00.000000000 Z
12
+ date: 2014-09-28 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: fast, efficient, unicode aware HAT trie with prefix / suffix support.
15
15
  email: