ferret 0.11.5 → 0.11.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -13,7 +13,7 @@
13
13
  ****************************************************************************/
14
14
 
15
15
  INLINE Token *tk_set(Token *tk,
16
- char *text, int tlen, int start, int end, int pos_inc)
16
+ char *text, int tlen, off_t start, off_t end, int pos_inc)
17
17
  {
18
18
  if (tlen >= MAX_WORD_SIZE) {
19
19
  tlen = MAX_WORD_SIZE - 1;
@@ -31,16 +31,16 @@ INLINE Token *tk_set_ts(Token *tk,
31
31
  char *start, char *end, char *text, int pos_inc)
32
32
  {
33
33
  return tk_set(tk, start, (int)(end - start),
34
- (int)(start - text), (int)(end - text), pos_inc);
34
+ (off_t)(start - text), (off_t)(end - text), pos_inc);
35
35
  }
36
36
 
37
37
  INLINE Token *tk_set_no_len(Token *tk,
38
- char *text, int start, int end, int pos_inc)
38
+ char *text, off_t start, off_t end, int pos_inc)
39
39
  {
40
40
  return tk_set(tk, text, (int)strlen(text), start, end, pos_inc);
41
41
  }
42
42
 
43
- INLINE Token *w_tk_set(Token *tk, wchar_t *text, int start, int end,
43
+ INLINE Token *w_tk_set(Token *tk, wchar_t *text, off_t start, off_t end,
44
44
  int pos_inc)
45
45
  {
46
46
  int len = wcstombs(tk->text, text, MAX_WORD_SIZE - 1);
@@ -374,8 +374,8 @@ static Token *mb_wst_next_lc(TokenStream *ts)
374
374
  }
375
375
  *w = 0;
376
376
  ts->t = t;
377
- return w_tk_set(&(CTS(ts)->token), wbuf, (int)(start - ts->text),
378
- (int)(t - ts->text), 1);
377
+ return w_tk_set(&(CTS(ts)->token), wbuf, (off_t)(start - ts->text),
378
+ (off_t)(t - ts->text), 1);
379
379
  }
380
380
 
381
381
  TokenStream *mb_whitespace_tokenizer_new(bool lowercase)
@@ -512,8 +512,8 @@ Token *mb_lt_next_lc(TokenStream *ts)
512
512
  }
513
513
  *w = 0;
514
514
  ts->t = t;
515
- return w_tk_set(&(CTS(ts)->token), wbuf, (int)(start - ts->text),
516
- (int)(t - ts->text), 1);
515
+ return w_tk_set(&(CTS(ts)->token), wbuf, (off_t)(start - ts->text),
516
+ (off_t)(t - ts->text), 1);
517
517
  }
518
518
 
519
519
  TokenStream *mb_letter_tokenizer_new(bool lowercase)
@@ -926,8 +926,8 @@ static Token *std_next(TokenStream *ts)
926
926
  }
927
927
  ts->t = t + len;
928
928
  token[len] = 0;
929
- return tk_set(&(CTS(ts)->token), token, len, (int)(start - ts->text),
930
- (int)(ts->t - ts->text), 1);
929
+ return tk_set(&(CTS(ts)->token), token, len, (off_t)(start - ts->text),
930
+ (off_t)(ts->t - ts->text), 1);
931
931
  }
932
932
 
933
933
  /* now see how long a url we can find. */
@@ -974,8 +974,8 @@ static Token *std_next(TokenStream *ts)
974
974
  }
975
975
  }
976
976
  tk_set(&(CTS(ts)->token), token, token_i,
977
- (int)(start - ts->text),
978
- (int)(t - ts->text), 1);
977
+ (off_t)(start - ts->text),
978
+ (off_t)(t - ts->text), 1);
979
979
  }
980
980
  else { /* just return the url as is */
981
981
  tk_set_ts(&(CTS(ts)->token), start, t, ts->text, 1);
@@ -16,16 +16,16 @@ typedef struct Token
16
16
  {
17
17
  char text[MAX_WORD_SIZE];
18
18
  int len;
19
- int start;
20
- int end;
19
+ off_t start;
20
+ off_t end;
21
21
  int pos_inc;
22
22
  } Token;
23
23
 
24
24
  extern Token *tk_new();
25
25
  extern void tk_destroy(void *p);
26
- extern Token *tk_set(Token *tk, char *text, int tlen, int start, int end,
26
+ extern Token *tk_set(Token *tk, char *text, int tlen, off_t start, off_t end,
27
27
  int pos_inc);
28
- extern Token *tk_set_no_len(Token *tk, char *text, int start, int end,
28
+ extern Token *tk_set_no_len(Token *tk, char *text, off_t start, off_t end,
29
29
  int pos_inc);
30
30
  extern int tk_eq(Token *tk1, Token *tk2);
31
31
  extern int tk_cmp(Token *tk1, Token *tk2);
@@ -1450,10 +1450,12 @@ TermVector *fr_read_term_vector(FieldsReader *fr, int field_num)
1450
1450
  if (store_offsets) {
1451
1451
  int num_positions = tv->offset_cnt = is_read_vint(fdt_in);
1452
1452
  Offset *offsets = tv->offsets = ALLOC_N(Offset, num_positions);
1453
- off_t offset = 0;
1453
+ long long offset = 0;
1454
1454
  for (i = 0; i < num_positions; i++) {
1455
- offsets[i].start = offset += is_read_vint(fdt_in);
1456
- offsets[i].end = offset += is_read_vint(fdt_in);
1455
+ offsets[i].start =
1456
+ (off_t)(offset += (long long)is_read_vll(fdt_in));
1457
+ offsets[i].end =
1458
+ (off_t)(offset += (long long)is_read_vll(fdt_in));
1457
1459
  }
1458
1460
  }
1459
1461
  }
@@ -1681,13 +1683,13 @@ void fw_add_postings(FieldsWriter *fw,
1681
1683
 
1682
1684
  if (fi_store_offsets(fi)) {
1683
1685
  /* use delta encoding for offsets */
1684
- int last_end = 0;
1686
+ long long last_end = 0;
1685
1687
  os_write_vint(fdt_out, offset_count); /* write shared prefix length */
1686
1688
  for (i = 0; i < offset_count; i++) {
1687
- off_t start = offsets[i].start;
1688
- off_t end = offsets[i].end;
1689
- os_write_vint(fdt_out, start - last_end);
1690
- os_write_vint(fdt_out, end - start);
1689
+ long long start = (long long)offsets[i].start;
1690
+ long long end = (long long)offsets[i].end;
1691
+ os_write_vll(fdt_out, (unsigned long long)(start - last_end));
1692
+ os_write_vll(fdt_out, (unsigned long long)(end - start));
1691
1693
  last_end = end;
1692
1694
  }
1693
1695
  }
@@ -5197,7 +5199,7 @@ static void dw_add_posting(MemoryPool *mp,
5197
5199
  }
5198
5200
  }
5199
5201
 
5200
- static INLINE void dw_add_offsets(DocWriter *dw, int pos, int start, int end)
5202
+ static INLINE void dw_add_offsets(DocWriter *dw, int pos, off_t start, off_t end)
5201
5203
  {
5202
5204
  if (pos >= dw->offsets_capa) {
5203
5205
  int old_capa = dw->offsets_capa;
@@ -1192,8 +1192,8 @@ static VALUE
1192
1192
  frt_get_tv_offsets(Offset *offset)
1193
1193
  {
1194
1194
  return rb_struct_new(cTVOffsets,
1195
- INT2FIX(offset->start),
1196
- INT2FIX(offset->end),
1195
+ ULL2NUM((unsigned long long)offset->start),
1196
+ ULL2NUM((unsigned long long)offset->end),
1197
1197
  NULL);
1198
1198
  }
1199
1199
 
@@ -403,6 +403,36 @@ INLINE off_t is_read_voff_t(InStream *is)
403
403
  return res;
404
404
  }
405
405
 
406
+ /* optimized to use unchecked read_byte if there is definitely space */
407
+ INLINE unsigned long long is_read_vll(InStream *is)
408
+ {
409
+ register unsigned long long res, b;
410
+ register int shift = 7;
411
+
412
+ if (is->buf.pos > (is->buf.len - VINT_MAX_LEN)) {
413
+ b = is_read_byte(is);
414
+ res = b & 0x7F; /* 0x7F = 0b01111111 */
415
+
416
+ while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
417
+ b = is_read_byte(is);
418
+ res |= (b & 0x7F) << shift;
419
+ shift += 7;
420
+ }
421
+ }
422
+ else { /* unchecked optimization */
423
+ b = read_byte(is);
424
+ res = b & 0x7F; /* 0x7F = 0b01111111 */
425
+
426
+ while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
427
+ b = read_byte(is);
428
+ res |= (b & 0x7F) << shift;
429
+ shift += 7;
430
+ }
431
+ }
432
+
433
+ return res;
434
+ }
435
+
406
436
  INLINE void is_skip_vints(InStream *is, register int cnt)
407
437
  {
408
438
  for (; cnt > 0; cnt--) {
@@ -545,6 +575,25 @@ INLINE void os_write_voff_t(OutStream *os, register off_t num)
545
575
  }
546
576
  }
547
577
 
578
+ /* optimized to use an unchecked write if there is space */
579
+ INLINE void os_write_vll(OutStream *os, register unsigned long long num)
580
+ {
581
+ if (os->buf.pos > VINT_END) {
582
+ while (num > 127) {
583
+ os_write_byte(os, (uchar)((num & 0x7f) | 0x80));
584
+ num >>= 7;
585
+ }
586
+ os_write_byte(os, (uchar)num);
587
+ }
588
+ else {
589
+ while (num > 127) {
590
+ write_byte(os, (uchar)((num & 0x7f) | 0x80));
591
+ num >>= 7;
592
+ }
593
+ write_byte(os, (uchar)num);
594
+ }
595
+ }
596
+
548
597
  void os_write_string(OutStream *os, char *str)
549
598
  {
550
599
  int len = (int)strlen(str);
@@ -176,7 +176,11 @@ struct Store
176
176
  CompoundStore *cmpd; /* for compound_store only */
177
177
  } dir;
178
178
 
179
+ #ifdef POSH_OS_WIN32
180
+ int file_mode;
181
+ #else
179
182
  mode_t file_mode;
183
+ #endif
180
184
  HashSet *locks;
181
185
 
182
186
  /**
@@ -554,6 +558,16 @@ extern void os_write_vint(OutStream *os, register unsigned int num);
554
558
  */
555
559
  extern void os_write_voff_t(OutStream *os, register off_t num);
556
560
 
561
+ /**
562
+ * Write an unsigned long long to OutStream in compressed VINT format.
563
+ * TODO: describe VINT format
564
+ *
565
+ * @param os OutStream to write to
566
+ * @param num the long long to write
567
+ * @raise IO_ERROR if there is an error writing to the file-system
568
+ */
569
+ extern void os_write_vll(OutStream *os, register unsigned long long num);
570
+
557
571
  /**
558
572
  * Write a string to the OutStream. A string is an integer +length+ in VINT
559
573
  * format (see os_write_vint) followed by +length+ bytes. The string can then
@@ -694,6 +708,17 @@ extern INLINE void is_skip_vints(InStream *is, register int cnt);
694
708
  */
695
709
  extern INLINE off_t is_read_voff_t(InStream *is);
696
710
 
711
+ /**
712
+ * Read a compressed (VINT) unsigned long long from the InStream.
713
+ * TODO: describe VINT format
714
+ *
715
+ * @param is the InStream to read from
716
+ * @return a long long
717
+ * @raise IO_ERROR if there is a error reading from the file-system
718
+ * @raise EOF_ERROR if there is an attempt to read past the end of the file
719
+ */
720
+ extern INLINE unsigned long long is_read_vll(InStream *is);
721
+
697
722
  /**
698
723
  * Read a string from the InStream. A string is an integer +length+ in vint
699
724
  * format (see is_read_vint) followed by +length+ bytes. This is the format
@@ -1,3 +1,3 @@
1
1
  module Ferret
2
- VERSION = '0.11.5'
2
+ VERSION = '0.11.6'
3
3
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: ferret
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.11.5
7
- date: 2007-11-17 00:00:00 +11:00
6
+ version: 0.11.6
7
+ date: 2007-11-29 00:00:00 +11:00
8
8
  summary: Ruby indexing library.
9
9
  require_paths:
10
10
  - lib