ferret 0.11.3 → 0.11.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -131,7 +131,7 @@ file "ext/#{EXT}" => ["ext/Makefile"] do
131
131
  puts
132
132
  puts "**********************************************************************"
133
133
  puts "You may need to call VCVARS32.BAT to set the environment variables."
134
- puts ' c:\Program Files\Microsoft Visual Studio\VC98\Bin\VCVARS32.BAT'
134
+ puts ' "f:\Program Files\Microsoft Visual Studio\VC98\Bin\VCVARS32.BAT"'
135
135
  puts "**********************************************************************"
136
136
  puts
137
137
  raise e
@@ -175,6 +175,9 @@ PKG_FILES = FileList[
175
175
  '[-A-Z]*',
176
176
  'ext/**/*.[ch]',
177
177
  'lib/**/*.rb',
178
+ 'lib/**/*.rhtml',
179
+ 'lib/**/*.css',
180
+ 'lib/**/*.js',
178
181
  'test/**/*.rb',
179
182
  'test/**/wordfile',
180
183
  'rake_utils/**/*.rb',
@@ -205,6 +208,9 @@ else
205
208
  s.extensions << "ext/extconf.rb"
206
209
  s.require_path = 'lib'
207
210
  s.autorequire = 'ferret'
211
+ s.bindir = 'bin'
212
+ s.executables = ['ferret-browser']
213
+ s.default_executable = 'ferret-browser'
208
214
 
209
215
  #### Author and project details.
210
216
  s.author = "David Balmain"
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $: << File.expand_path(File.join(File.basename(__FILE__), '../lib'))
4
+ require 'ferret'
5
+ require 'ferret/browser'
6
+
7
+ require 'optparse'
8
+ require 'ostruct'
9
+
10
+ SERVER_OPTIONS = ['webrick']
11
+ conf = OpenStruct.new(:host => '0.0.0.0', :port => 3301)
12
+
13
+ opts = OptionParser.new do |opts|
14
+ opts.banner = "Usage: #{File.basename($0)} /path/to/index"
15
+ opts.separator ""
16
+ opts.separator "Specific Options:"
17
+
18
+ opts.on("-h", "--host HOSTNAME",
19
+ "Host for web server to bind to (default is all IPs)") { |conf.host| }
20
+ opts.on("-p", "--port NUM",
21
+ "Port for web server (defaults to #{conf.port})") { |conf.port| }
22
+ opts.on("-s", "--server NAME",
23
+ "Server to force (#{SERVER_OPTIONS.join(', ')}).") { |s| conf.server = s.to_sym }
24
+
25
+ opts.separator ""
26
+ opts.separator "Common options:"
27
+
28
+ opts.on_tail("-?", "--help", "Show this message") do
29
+ puts opts
30
+ exit
31
+ end
32
+
33
+ opts.on_tail("-v", "--version", "Show version") do
34
+ puts Ferret::VERSION
35
+ exit
36
+ end
37
+ end
38
+
39
+ opts.parse! ARGV
40
+ if ARGV.length != 1
41
+ puts opts
42
+ exit
43
+ end
44
+ @path = ARGV[0]
45
+
46
+ # Load the Ferret index
47
+ begin
48
+ @reader = Ferret::Index::IndexReader.new(@path)
49
+ rescue Ferret::FileNotFoundError => e
50
+ puts "\033[31mCannot start Ferret. No index exists at \"\033[m" +
51
+ "\033[33m#{@path}\033[m\033[31m\".\033[m"
52
+ exit
53
+ rescue Exception => e
54
+ puts "\033[31mCannot start Ferret.\n\033[m\033[33m#{e.to_s}\031[m"
55
+ exit
56
+ end
57
+
58
+ unless conf.server
59
+ conf.server = :webrick
60
+ end
61
+
62
+ case conf.server.to_s
63
+ when 'webrick'
64
+ require 'webrick/httpserver'
65
+ require 'ferret/browser/webrick'
66
+
67
+ # Mount the root
68
+ s = WEBrick::HTTPServer.new(:BindAddress => conf.host, :Port => conf.port)
69
+ s.mount "/s", WEBrick::HTTPServlet::FileHandler, Ferret::Browser::Controller::STATIC_DIR, true
70
+ s.mount "/", WEBrick::FerretBrowserHandler, @reader, @path
71
+
72
+ # Server up
73
+ trap(:INT) do
74
+ s.shutdown
75
+ end
76
+ s.start
77
+ else
78
+ raise "server #{conf.server} not known. Must be one of [#{SERVER_OPTIONS.join(', ')}]"
79
+ end
data/ext/analysis.c CHANGED
@@ -161,8 +161,9 @@ __inline int mb_next_char(wchar_t *wchr, const char *s, mbstate_t *state)
161
161
  t++;
162
162
  ZEROSET(state, mbstate_t);
163
163
  num_bytes = (int)mbrtowc(wchr, t, MB_CUR_MAX, state);
164
- } while ((num_bytes < 0) && (*wchr != 0) && (*t != 0));
164
+ } while ((num_bytes < 0) && (*t != 0));
165
165
  num_bytes = t - s;
166
+ if (*t == 0) *wchr = 0;
166
167
  }
167
168
  return num_bytes;
168
169
  }
@@ -1301,12 +1302,14 @@ Token *mb_lcf_next(TokenStream *ts)
1301
1302
  {
1302
1303
  wchar_t wbuf[MAX_WORD_SIZE + 1], *wchr;
1303
1304
  Token *tk = TkFilt(ts)->sub_ts->next(TkFilt(ts)->sub_ts);
1305
+ int x;
1306
+ wbuf[MAX_WORD_SIZE] = 0;
1304
1307
 
1305
1308
  if (tk == NULL) {
1306
1309
  return tk;
1307
1310
  }
1308
1311
 
1309
- if (mbstowcs(wbuf, tk->text, MAX_WORD_SIZE) <= 0) return tk;
1312
+ if ((x=mbstowcs(wbuf, tk->text, MAX_WORD_SIZE)) <= 0) return tk;
1310
1313
  wchr = wbuf;
1311
1314
  while (*wchr != 0) {
1312
1315
  *wchr = towlower(*wchr);
data/ext/config.h CHANGED
@@ -2,6 +2,7 @@
2
2
  #define FRT_DEFINES_H
3
3
 
4
4
  #include <sys/types.h>
5
+ #include <limits.h>
5
6
  #include "posh.h"
6
7
 
7
8
  #ifndef false
@@ -21,7 +22,7 @@ typedef posh_i32_t f_i32;
21
22
  typedef posh_u64_t f_u64;
22
23
  typedef posh_i64_t f_i64;
23
24
 
24
- #if defined(_FILE_OFFSET_BITS) && (_FILE_OFFSET_BITS == 64)
25
+ #if ( LONG_MAX == 2147483647 ) && defined(_FILE_OFFSET_BITS) && (_FILE_OFFSET_BITS == 64)
25
26
  #define F_OFF_T_PFX "ll"
26
27
  #else
27
28
  #define F_OFF_T_PFX "l"
data/ext/ferret.c CHANGED
@@ -65,9 +65,9 @@ void
65
65
  object_add2(void *key, VALUE obj, const char *file, int line)
66
66
  {
67
67
  if (h_get(object_map, key))
68
- printf("failed adding %x to %d; already contains %x. %s:%d\n",
69
- (int)obj, (int)key, (int)h_get(object_map, key), file, line);
70
- //printf("adding %d. now contains %d %s:%d\n", (int)key, ++hash_cnt, file, line);
68
+ printf("failed adding %lx to %ld; already contains %lx. %s:%d\n",
69
+ (long)obj, (long)key, (long)h_get(object_map, key), file, line);
70
+ //printf("adding %ld. now contains %d %s:%d\n", (long)key, ++hash_cnt, file, line);
71
71
  h_set(object_map, key, (void *)obj);
72
72
  }
73
73
 
@@ -76,7 +76,7 @@ void
76
76
  object_set2(void *key, VALUE obj, const char *file, int line)
77
77
  {
78
78
  //if (!h_get(object_map, key))
79
- //printf("adding %d. now contains %d %s:%d\n", (int)key, ++hash_cnt, file, line);
79
+ //printf("adding %ld. now contains %d %s:%d\n", (long)key, ++hash_cnt, file, line);
80
80
  h_set(object_map, key, (void *)obj);
81
81
  }
82
82
 
@@ -85,8 +85,8 @@ void
85
85
  object_del2(void *key, const char *file, int line)
86
86
  {
87
87
  if (object_get(key) == Qnil)
88
- printf("failed deleting %d. %s:%d\n", (int)key, file, line);
89
- //printf("deleting %d. now contains %d, %s:%d\n", (int)key, --hash_cnt, file, line);
88
+ printf("failed deleting %ld. %s:%d\n", (long)key, file, line);
89
+ //printf("deleting %ld. now contains %ld, %s:%d\n", (long)key, --hash_cnt, file, line);
90
90
  h_del(object_map, key);
91
91
  }
92
92
 
@@ -189,6 +189,31 @@ frt_field(VALUE rfield)
189
189
  return NULL;
190
190
  }
191
191
 
192
+ /*
193
+ * Json Exportation - Loading each LazyDoc and formatting them into json
194
+ * This code is designed to get a VERY FAST json string, the goal was speed,
195
+ * not sexyness.
196
+ * Jeremie 'ahFeel' BORDIER
197
+ * ahFeel@rift.Fr
198
+ */
199
+ __inline char *
200
+ json_concat_string(char *s, char *field)
201
+ {
202
+ *(s++) = '"';
203
+ while (*field) {
204
+ if (*field == '"') {
205
+ *(s++) = '\'';
206
+ *(s++) = *(field++);
207
+ *(s++) = '\'';
208
+ }
209
+ else {
210
+ *(s++) = *(field++);
211
+ }
212
+ }
213
+ *(s++) = '"';
214
+ return s;
215
+ }
216
+
192
217
  static VALUE error_map;
193
218
 
194
219
  VALUE frt_get_error(const char *err_type)
@@ -355,7 +380,7 @@ void Init_ferret_ext(void)
355
380
  cStateError =
356
381
  rb_define_class_under(mFerret, "StateError", rb_eStandardError);
357
382
  cFileNotFoundError =
358
- rb_define_class_under(rb_cObject, "FileNotFoundError", rb_eIOError);
383
+ rb_define_class_under(mFerret, "FileNotFoundError", rb_eIOError);
359
384
 
360
385
  error_map = rb_hash_new();
361
386
  rb_hash_aset(error_map, rb_intern("Exception"), rb_eStandardError);
data/ext/ferret.h CHANGED
@@ -65,6 +65,7 @@ extern VALUE frt_hs_to_rb_ary(HashSet *hs);
65
65
  extern void *frt_rb_data_ptr(VALUE val);
66
66
  extern char * frt_field(VALUE rfield);
67
67
  extern VALUE frt_get_term(const char *field, const char *term);
68
+ extern __inline char *json_concat_string(char *s, char *field);
68
69
  extern char *rs2s(VALUE rstr);
69
70
  extern char *nstrdup(VALUE rstr);
70
71
  #define Frt_Make_Struct(klass)\
data/ext/index.c CHANGED
@@ -6,7 +6,6 @@
6
6
  #include <string.h>
7
7
  #include <limits.h>
8
8
  #include <ctype.h>
9
- #include <unistd.h>
10
9
 
11
10
  #define GET_LOCK(lock, name, store, err_msg) do {\
12
11
  lock = store->open_lock(store, name);\
@@ -117,10 +116,10 @@ char *fn_for_generation(char *buf, char *base, char *ext, f_i64 gen)
117
116
  }
118
117
  }
119
118
 
120
- char *segfn_for_generation(char *buf, int generation)
119
+ char *segfn_for_generation(char *buf, f_u64 generation)
121
120
  {
122
121
  char b[SEGMENT_NAME_MAX_LENGTH];
123
- char *u = u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, (f_u64)generation);
122
+ char *u = u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, generation);
124
123
  sprintf(buf, SEGMENTS_FILE_NAME"_%s", u);
125
124
  return buf;
126
125
  }
@@ -393,25 +392,36 @@ FieldInfo *fis_by_number(FieldInfos *fis, int num)
393
392
 
394
393
  FieldInfos *fis_read(InStream *is)
395
394
  {
396
- int store_val, index_val, term_vector_val;
397
- int i;
398
- union { f_u32 i; float f; } tmp;
399
- FieldInfo *fi;
400
- FieldInfos *fis;
401
-
402
- store_val = is_read_vint(is);
403
- index_val = is_read_vint(is);
404
- term_vector_val = is_read_vint(is);
405
- fis = fis_new(store_val, index_val, term_vector_val);
406
- for (i = is_read_vint(is); i > 0; i--) {
407
- fi = ALLOC(FieldInfo);
408
- fi->name = is_read_string(is);
409
- tmp.i = is_read_u32(is);
410
- fi->boost = tmp.f;
411
- fi->bits = is_read_vint(is);
412
- fis_add_field(fis, fi);
413
- fi->ref_cnt = 1;
414
- }
395
+ FieldInfos *volatile fis;
396
+ TRY
397
+ do {
398
+ int store_val, index_val, term_vector_val;
399
+ int i;
400
+ union { f_u32 i; float f; } tmp;
401
+ FieldInfo *volatile fi;
402
+
403
+ store_val = is_read_vint(is);
404
+ index_val = is_read_vint(is);
405
+ term_vector_val = is_read_vint(is);
406
+ fis = fis_new(store_val, index_val, term_vector_val);
407
+ for (i = is_read_vint(is); i > 0; i--) {
408
+ fi = ALLOC_AND_ZERO(FieldInfo);
409
+ TRY
410
+ fi->name = is_read_string_safe(is);
411
+ tmp.i = is_read_u32(is);
412
+ fi->boost = tmp.f;
413
+ fi->bits = is_read_vint(is);
414
+ XCATCHALL
415
+ free(fi->name);
416
+ free(fi);
417
+ XENDTRY
418
+ fis_add_field(fis, fi);
419
+ fi->ref_cnt = 1;
420
+ }
421
+ } while (0);
422
+ XCATCHALL
423
+ fis_deref(fis);
424
+ XENDTRY
415
425
 
416
426
  return fis;
417
427
  }
@@ -557,21 +567,26 @@ SegmentInfo *si_new(char *name, int doc_cnt, Store *store)
557
567
 
558
568
  SegmentInfo *si_read(Store *store, InStream *is)
559
569
  {
560
- SegmentInfo *si = ALLOC_AND_ZERO(SegmentInfo);
561
- si->store = store;
562
- si->name = is_read_string(is);
563
- si->doc_cnt = is_read_vint(is);
564
- si->del_gen = is_read_vint(is);
565
- si->norm_gens_size = is_read_vint(is);
566
- si->ref_cnt = 1;
567
- if (0 < si->norm_gens_size) {
568
- int i;
569
- si->norm_gens = ALLOC_N(int, si->norm_gens_size);
570
- for (i = si->norm_gens_size - 1; i >= 0; i--) {
571
- si->norm_gens[i] = is_read_vint(is);
570
+ SegmentInfo *volatile si = ALLOC_AND_ZERO(SegmentInfo);
571
+ TRY
572
+ si->store = store;
573
+ si->name = is_read_string_safe(is);
574
+ si->doc_cnt = is_read_vint(is);
575
+ si->del_gen = is_read_vint(is);
576
+ si->norm_gens_size = is_read_vint(is);
577
+ si->ref_cnt = 1;
578
+ if (0 < si->norm_gens_size) {
579
+ int i;
580
+ si->norm_gens = ALLOC_N(int, si->norm_gens_size);
581
+ for (i = si->norm_gens_size - 1; i >= 0; i--) {
582
+ si->norm_gens[i] = is_read_vint(is);
583
+ }
572
584
  }
573
- }
574
- si->use_compound_file = (bool)is_read_byte(is);
585
+ si->use_compound_file = (bool)is_read_byte(is);
586
+ XCATCHALL
587
+ free(si->name);
588
+ free(si);
589
+ XENDTRY
575
590
  return si;
576
591
  }
577
592
 
@@ -4008,17 +4023,23 @@ static void bv_write(BitVector *bv, Store *store, char *name)
4008
4023
  static BitVector *bv_read(Store *store, char *name)
4009
4024
  {
4010
4025
  int i;
4011
- BitVector *bv = ALLOC_AND_ZERO(BitVector);
4012
- InStream *is = store->open_input(store, name);
4026
+ bool success = false;
4027
+ InStream *volatile is = store->open_input(store, name);
4028
+ BitVector *volatile bv = ALLOC_AND_ZERO(BitVector);
4013
4029
  bv->size = (int)is_read_vint(is);
4014
4030
  bv->capa = (bv->size >> 5) + 1;
4015
4031
  bv->bits = ALLOC_AND_ZERO_N(f_u32, bv->capa);
4016
4032
  bv->ref_cnt = 1;
4017
- for (i = (bv->size >> 5); i >= 0; i--) {
4018
- bv->bits[i] = is_read_u32(is);
4019
- }
4020
- is_close(is);
4021
- bv_recount(bv);
4033
+ TRY
4034
+ for (i = (bv->size >> 5); i >= 0; i--) {
4035
+ bv->bits[i] = is_read_u32(is);
4036
+ }
4037
+ bv_recount(bv);
4038
+ success = true;
4039
+ XFINALLY
4040
+ is_close(is);
4041
+ if (!success && bv) bv_destroy(bv);
4042
+ XENDTRY
4022
4043
  return bv;
4023
4044
  }
4024
4045
 
@@ -4297,6 +4318,7 @@ static IndexReader *sr_setup_i(SegmentReader *sr)
4297
4318
  sr->fr_bucket = ary_new();
4298
4319
  }
4299
4320
  XCATCHALL
4321
+ ir->sis = NULL;
4300
4322
  ir_close(ir);
4301
4323
  XENDTRY
4302
4324
 
@@ -4306,10 +4328,9 @@ static IndexReader *sr_setup_i(SegmentReader *sr)
4306
4328
  static IndexReader *sr_open(SegmentInfos *sis, FieldInfos *fis, int si_num,
4307
4329
  bool is_owner)
4308
4330
  {
4309
- IndexReader *ir;
4310
4331
  SegmentReader *sr = ALLOC_AND_ZERO(SegmentReader);
4311
4332
  sr->si = sis->segs[si_num];
4312
- ir = ir_setup(IR(sr), sr->si->store, sis, fis, is_owner);
4333
+ ir_setup(IR(sr), sr->si->store, sis, fis, is_owner);
4313
4334
  return sr_setup_i(sr);
4314
4335
  }
4315
4336
 
@@ -5187,10 +5208,10 @@ HashTable *dw_invert_field(DocWriter *dw,
5187
5208
  if (fld_inv->is_tokenized) {
5188
5209
  Token *tk;
5189
5210
  int pos = -1, num_terms = 0;
5190
- TokenStream *ts = a_get_ts(a, df->name, "");
5191
5211
 
5192
5212
  for (i = 0; i < df_size; i++) {
5193
- ts->reset(ts, df->data[i]);
5213
+ TokenStream *ts = a_get_ts(a, df->name, df->data[i]);
5214
+ /* ts->reset(ts, df->data[i]); no longer being called */
5194
5215
  if (store_offsets) {
5195
5216
  while (NULL != (tk = ts->next(ts))) {
5196
5217
  pos += tk->pos_inc;
@@ -5212,8 +5233,8 @@ HashTable *dw_invert_field(DocWriter *dw,
5212
5233
  }
5213
5234
  }
5214
5235
  }
5236
+ ts_deref(ts);
5215
5237
  }
5216
- ts_deref(ts);
5217
5238
  fld_inv->length = num_terms;
5218
5239
  }
5219
5240
  else {
data/ext/q_boolean.c CHANGED
@@ -1555,18 +1555,25 @@ Query *bq_new(bool coord_disabled)
1555
1555
  return self;
1556
1556
  }
1557
1557
 
1558
+ Query *bq_new_max(bool coord_disabled, int max)
1559
+ {
1560
+ Query *q = bq_new(coord_disabled);
1561
+ BQ(q)->max_clause_cnt = max;
1562
+ return q;
1563
+ }
1564
+
1558
1565
  BooleanClause *bq_add_clause_nr(Query *self, BooleanClause *bc)
1559
1566
  {
1560
- if (BQ(self)->clause_cnt >= BQ(self)->clause_capa) {
1561
- BQ(self)->clause_capa *= 2;
1562
- REALLOC_N(BQ(self)->clauses, BooleanClause *, BQ(self)->clause_capa);
1563
- }
1564
- if (BQ(self)->clause_cnt > BQ(self)->max_clause_cnt) {
1567
+ if (BQ(self)->clause_cnt >= BQ(self)->max_clause_cnt) {
1565
1568
  RAISE(STATE_ERROR, "Two many clauses. The max clause limit is set to "
1566
1569
  "<%d> but your query has <%d> clauses. You can try increasing "
1567
1570
  ":max_clause_count for the BooleanQuery or using a different "
1568
1571
  "type of query.", BQ(self)->clause_cnt, BQ(self)->max_clause_cnt);
1569
1572
  }
1573
+ if (BQ(self)->clause_cnt >= BQ(self)->clause_capa) {
1574
+ BQ(self)->clause_capa *= 2;
1575
+ REALLOC_N(BQ(self)->clauses, BooleanClause *, BQ(self)->clause_capa);
1576
+ }
1570
1577
  BQ(self)->clauses[BQ(self)->clause_cnt] = bc;
1571
1578
  BQ(self)->clause_cnt++;
1572
1579
  return bc;
@@ -1580,9 +1587,16 @@ BooleanClause *bq_add_clause(Query *self, BooleanClause *bc)
1580
1587
 
1581
1588
  BooleanClause *bq_add_query_nr(Query *self, Query *sub_query, enum BC_TYPE occur)
1582
1589
  {
1583
- BooleanClause *bc = bc_new(sub_query, occur);
1590
+ BooleanClause *bc;
1591
+ if (BQ(self)->clause_cnt >= BQ(self)->max_clause_cnt) {
1592
+ RAISE(STATE_ERROR, "Two many clauses. The max clause limit is set to "
1593
+ "<%d> but your query has <%d> clauses. You can try increasing "
1594
+ ":max_clause_count for the BooleanQuery or using a different "
1595
+ "type of query.", BQ(self)->clause_cnt, BQ(self)->max_clause_cnt);
1596
+ }
1597
+ bc = bc_new(sub_query, occur);
1584
1598
  bq_add_clause(self, bc);
1585
- bc_deref(bc); /* bc would have been referenced unnecessarily */
1599
+ bc_deref(bc); /* bc was referenced unnecessarily */
1586
1600
  return bc;
1587
1601
  }
1588
1602