ferret 0.11.3 → 0.11.4

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -131,7 +131,7 @@ file "ext/#{EXT}" => ["ext/Makefile"] do
131
131
  puts
132
132
  puts "**********************************************************************"
133
133
  puts "You may need to call VCVARS32.BAT to set the environment variables."
134
- puts ' c:\Program Files\Microsoft Visual Studio\VC98\Bin\VCVARS32.BAT'
134
+ puts ' "f:\Program Files\Microsoft Visual Studio\VC98\Bin\VCVARS32.BAT"'
135
135
  puts "**********************************************************************"
136
136
  puts
137
137
  raise e
@@ -175,6 +175,9 @@ PKG_FILES = FileList[
175
175
  '[-A-Z]*',
176
176
  'ext/**/*.[ch]',
177
177
  'lib/**/*.rb',
178
+ 'lib/**/*.rhtml',
179
+ 'lib/**/*.css',
180
+ 'lib/**/*.js',
178
181
  'test/**/*.rb',
179
182
  'test/**/wordfile',
180
183
  'rake_utils/**/*.rb',
@@ -205,6 +208,9 @@ else
205
208
  s.extensions << "ext/extconf.rb"
206
209
  s.require_path = 'lib'
207
210
  s.autorequire = 'ferret'
211
+ s.bindir = 'bin'
212
+ s.executables = ['ferret-browser']
213
+ s.default_executable = 'ferret-browser'
208
214
 
209
215
  #### Author and project details.
210
216
  s.author = "David Balmain"
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $: << File.expand_path(File.join(File.basename(__FILE__), '../lib'))
4
+ require 'ferret'
5
+ require 'ferret/browser'
6
+
7
+ require 'optparse'
8
+ require 'ostruct'
9
+
10
+ SERVER_OPTIONS = ['webrick']
11
+ conf = OpenStruct.new(:host => '0.0.0.0', :port => 3301)
12
+
13
+ opts = OptionParser.new do |opts|
14
+ opts.banner = "Usage: #{File.basename($0)} /path/to/index"
15
+ opts.separator ""
16
+ opts.separator "Specific Options:"
17
+
18
+ opts.on("-h", "--host HOSTNAME",
19
+ "Host for web server to bind to (default is all IPs)") { |conf.host| }
20
+ opts.on("-p", "--port NUM",
21
+ "Port for web server (defaults to #{conf.port})") { |conf.port| }
22
+ opts.on("-s", "--server NAME",
23
+ "Server to force (#{SERVER_OPTIONS.join(', ')}).") { |s| conf.server = s.to_sym }
24
+
25
+ opts.separator ""
26
+ opts.separator "Common options:"
27
+
28
+ opts.on_tail("-?", "--help", "Show this message") do
29
+ puts opts
30
+ exit
31
+ end
32
+
33
+ opts.on_tail("-v", "--version", "Show version") do
34
+ puts Ferret::VERSION
35
+ exit
36
+ end
37
+ end
38
+
39
+ opts.parse! ARGV
40
+ if ARGV.length != 1
41
+ puts opts
42
+ exit
43
+ end
44
+ @path = ARGV[0]
45
+
46
+ # Load the Ferret index
47
+ begin
48
+ @reader = Ferret::Index::IndexReader.new(@path)
49
+ rescue Ferret::FileNotFoundError => e
50
+ puts "\033[31mCannot start Ferret. No index exists at \"\033[m" +
51
+ "\033[33m#{@path}\033[m\033[31m\".\033[m"
52
+ exit
53
+ rescue Exception => e
54
+ puts "\033[31mCannot start Ferret.\n\033[m\033[33m#{e.to_s}\031[m"
55
+ exit
56
+ end
57
+
58
+ unless conf.server
59
+ conf.server = :webrick
60
+ end
61
+
62
+ case conf.server.to_s
63
+ when 'webrick'
64
+ require 'webrick/httpserver'
65
+ require 'ferret/browser/webrick'
66
+
67
+ # Mount the root
68
+ s = WEBrick::HTTPServer.new(:BindAddress => conf.host, :Port => conf.port)
69
+ s.mount "/s", WEBrick::HTTPServlet::FileHandler, Ferret::Browser::Controller::STATIC_DIR, true
70
+ s.mount "/", WEBrick::FerretBrowserHandler, @reader, @path
71
+
72
+ # Server up
73
+ trap(:INT) do
74
+ s.shutdown
75
+ end
76
+ s.start
77
+ else
78
+ raise "server #{conf.server} not known. Must be one of [#{SERVER_OPTIONS.join(', ')}]"
79
+ end
data/ext/analysis.c CHANGED
@@ -161,8 +161,9 @@ __inline int mb_next_char(wchar_t *wchr, const char *s, mbstate_t *state)
161
161
  t++;
162
162
  ZEROSET(state, mbstate_t);
163
163
  num_bytes = (int)mbrtowc(wchr, t, MB_CUR_MAX, state);
164
- } while ((num_bytes < 0) && (*wchr != 0) && (*t != 0));
164
+ } while ((num_bytes < 0) && (*t != 0));
165
165
  num_bytes = t - s;
166
+ if (*t == 0) *wchr = 0;
166
167
  }
167
168
  return num_bytes;
168
169
  }
@@ -1301,12 +1302,14 @@ Token *mb_lcf_next(TokenStream *ts)
1301
1302
  {
1302
1303
  wchar_t wbuf[MAX_WORD_SIZE + 1], *wchr;
1303
1304
  Token *tk = TkFilt(ts)->sub_ts->next(TkFilt(ts)->sub_ts);
1305
+ int x;
1306
+ wbuf[MAX_WORD_SIZE] = 0;
1304
1307
 
1305
1308
  if (tk == NULL) {
1306
1309
  return tk;
1307
1310
  }
1308
1311
 
1309
- if (mbstowcs(wbuf, tk->text, MAX_WORD_SIZE) <= 0) return tk;
1312
+ if ((x=mbstowcs(wbuf, tk->text, MAX_WORD_SIZE)) <= 0) return tk;
1310
1313
  wchr = wbuf;
1311
1314
  while (*wchr != 0) {
1312
1315
  *wchr = towlower(*wchr);
data/ext/config.h CHANGED
@@ -2,6 +2,7 @@
2
2
  #define FRT_DEFINES_H
3
3
 
4
4
  #include <sys/types.h>
5
+ #include <limits.h>
5
6
  #include "posh.h"
6
7
 
7
8
  #ifndef false
@@ -21,7 +22,7 @@ typedef posh_i32_t f_i32;
21
22
  typedef posh_u64_t f_u64;
22
23
  typedef posh_i64_t f_i64;
23
24
 
24
- #if defined(_FILE_OFFSET_BITS) && (_FILE_OFFSET_BITS == 64)
25
+ #if ( LONG_MAX == 2147483647 ) && defined(_FILE_OFFSET_BITS) && (_FILE_OFFSET_BITS == 64)
25
26
  #define F_OFF_T_PFX "ll"
26
27
  #else
27
28
  #define F_OFF_T_PFX "l"
data/ext/ferret.c CHANGED
@@ -65,9 +65,9 @@ void
65
65
  object_add2(void *key, VALUE obj, const char *file, int line)
66
66
  {
67
67
  if (h_get(object_map, key))
68
- printf("failed adding %x to %d; already contains %x. %s:%d\n",
69
- (int)obj, (int)key, (int)h_get(object_map, key), file, line);
70
- //printf("adding %d. now contains %d %s:%d\n", (int)key, ++hash_cnt, file, line);
68
+ printf("failed adding %lx to %ld; already contains %lx. %s:%d\n",
69
+ (long)obj, (long)key, (long)h_get(object_map, key), file, line);
70
+ //printf("adding %ld. now contains %d %s:%d\n", (long)key, ++hash_cnt, file, line);
71
71
  h_set(object_map, key, (void *)obj);
72
72
  }
73
73
 
@@ -76,7 +76,7 @@ void
76
76
  object_set2(void *key, VALUE obj, const char *file, int line)
77
77
  {
78
78
  //if (!h_get(object_map, key))
79
- //printf("adding %d. now contains %d %s:%d\n", (int)key, ++hash_cnt, file, line);
79
+ //printf("adding %ld. now contains %d %s:%d\n", (long)key, ++hash_cnt, file, line);
80
80
  h_set(object_map, key, (void *)obj);
81
81
  }
82
82
 
@@ -85,8 +85,8 @@ void
85
85
  object_del2(void *key, const char *file, int line)
86
86
  {
87
87
  if (object_get(key) == Qnil)
88
- printf("failed deleting %d. %s:%d\n", (int)key, file, line);
89
- //printf("deleting %d. now contains %d, %s:%d\n", (int)key, --hash_cnt, file, line);
88
+ printf("failed deleting %ld. %s:%d\n", (long)key, file, line);
89
+ //printf("deleting %ld. now contains %ld, %s:%d\n", (long)key, --hash_cnt, file, line);
90
90
  h_del(object_map, key);
91
91
  }
92
92
 
@@ -189,6 +189,31 @@ frt_field(VALUE rfield)
189
189
  return NULL;
190
190
  }
191
191
 
192
+ /*
193
+ * Json Exportation - Loading each LazyDoc and formatting them into json
194
+ * This code is designed to get a VERY FAST json string, the goal was speed,
195
+ * not sexyness.
196
+ * Jeremie 'ahFeel' BORDIER
197
+ * ahFeel@rift.Fr
198
+ */
199
+ __inline char *
200
+ json_concat_string(char *s, char *field)
201
+ {
202
+ *(s++) = '"';
203
+ while (*field) {
204
+ if (*field == '"') {
205
+ *(s++) = '\'';
206
+ *(s++) = *(field++);
207
+ *(s++) = '\'';
208
+ }
209
+ else {
210
+ *(s++) = *(field++);
211
+ }
212
+ }
213
+ *(s++) = '"';
214
+ return s;
215
+ }
216
+
192
217
  static VALUE error_map;
193
218
 
194
219
  VALUE frt_get_error(const char *err_type)
@@ -355,7 +380,7 @@ void Init_ferret_ext(void)
355
380
  cStateError =
356
381
  rb_define_class_under(mFerret, "StateError", rb_eStandardError);
357
382
  cFileNotFoundError =
358
- rb_define_class_under(rb_cObject, "FileNotFoundError", rb_eIOError);
383
+ rb_define_class_under(mFerret, "FileNotFoundError", rb_eIOError);
359
384
 
360
385
  error_map = rb_hash_new();
361
386
  rb_hash_aset(error_map, rb_intern("Exception"), rb_eStandardError);
data/ext/ferret.h CHANGED
@@ -65,6 +65,7 @@ extern VALUE frt_hs_to_rb_ary(HashSet *hs);
65
65
  extern void *frt_rb_data_ptr(VALUE val);
66
66
  extern char * frt_field(VALUE rfield);
67
67
  extern VALUE frt_get_term(const char *field, const char *term);
68
+ extern __inline char *json_concat_string(char *s, char *field);
68
69
  extern char *rs2s(VALUE rstr);
69
70
  extern char *nstrdup(VALUE rstr);
70
71
  #define Frt_Make_Struct(klass)\
data/ext/index.c CHANGED
@@ -6,7 +6,6 @@
6
6
  #include <string.h>
7
7
  #include <limits.h>
8
8
  #include <ctype.h>
9
- #include <unistd.h>
10
9
 
11
10
  #define GET_LOCK(lock, name, store, err_msg) do {\
12
11
  lock = store->open_lock(store, name);\
@@ -117,10 +116,10 @@ char *fn_for_generation(char *buf, char *base, char *ext, f_i64 gen)
117
116
  }
118
117
  }
119
118
 
120
- char *segfn_for_generation(char *buf, int generation)
119
+ char *segfn_for_generation(char *buf, f_u64 generation)
121
120
  {
122
121
  char b[SEGMENT_NAME_MAX_LENGTH];
123
- char *u = u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, (f_u64)generation);
122
+ char *u = u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, generation);
124
123
  sprintf(buf, SEGMENTS_FILE_NAME"_%s", u);
125
124
  return buf;
126
125
  }
@@ -393,25 +392,36 @@ FieldInfo *fis_by_number(FieldInfos *fis, int num)
393
392
 
394
393
  FieldInfos *fis_read(InStream *is)
395
394
  {
396
- int store_val, index_val, term_vector_val;
397
- int i;
398
- union { f_u32 i; float f; } tmp;
399
- FieldInfo *fi;
400
- FieldInfos *fis;
401
-
402
- store_val = is_read_vint(is);
403
- index_val = is_read_vint(is);
404
- term_vector_val = is_read_vint(is);
405
- fis = fis_new(store_val, index_val, term_vector_val);
406
- for (i = is_read_vint(is); i > 0; i--) {
407
- fi = ALLOC(FieldInfo);
408
- fi->name = is_read_string(is);
409
- tmp.i = is_read_u32(is);
410
- fi->boost = tmp.f;
411
- fi->bits = is_read_vint(is);
412
- fis_add_field(fis, fi);
413
- fi->ref_cnt = 1;
414
- }
395
+ FieldInfos *volatile fis;
396
+ TRY
397
+ do {
398
+ int store_val, index_val, term_vector_val;
399
+ int i;
400
+ union { f_u32 i; float f; } tmp;
401
+ FieldInfo *volatile fi;
402
+
403
+ store_val = is_read_vint(is);
404
+ index_val = is_read_vint(is);
405
+ term_vector_val = is_read_vint(is);
406
+ fis = fis_new(store_val, index_val, term_vector_val);
407
+ for (i = is_read_vint(is); i > 0; i--) {
408
+ fi = ALLOC_AND_ZERO(FieldInfo);
409
+ TRY
410
+ fi->name = is_read_string_safe(is);
411
+ tmp.i = is_read_u32(is);
412
+ fi->boost = tmp.f;
413
+ fi->bits = is_read_vint(is);
414
+ XCATCHALL
415
+ free(fi->name);
416
+ free(fi);
417
+ XENDTRY
418
+ fis_add_field(fis, fi);
419
+ fi->ref_cnt = 1;
420
+ }
421
+ } while (0);
422
+ XCATCHALL
423
+ fis_deref(fis);
424
+ XENDTRY
415
425
 
416
426
  return fis;
417
427
  }
@@ -557,21 +567,26 @@ SegmentInfo *si_new(char *name, int doc_cnt, Store *store)
557
567
 
558
568
  SegmentInfo *si_read(Store *store, InStream *is)
559
569
  {
560
- SegmentInfo *si = ALLOC_AND_ZERO(SegmentInfo);
561
- si->store = store;
562
- si->name = is_read_string(is);
563
- si->doc_cnt = is_read_vint(is);
564
- si->del_gen = is_read_vint(is);
565
- si->norm_gens_size = is_read_vint(is);
566
- si->ref_cnt = 1;
567
- if (0 < si->norm_gens_size) {
568
- int i;
569
- si->norm_gens = ALLOC_N(int, si->norm_gens_size);
570
- for (i = si->norm_gens_size - 1; i >= 0; i--) {
571
- si->norm_gens[i] = is_read_vint(is);
570
+ SegmentInfo *volatile si = ALLOC_AND_ZERO(SegmentInfo);
571
+ TRY
572
+ si->store = store;
573
+ si->name = is_read_string_safe(is);
574
+ si->doc_cnt = is_read_vint(is);
575
+ si->del_gen = is_read_vint(is);
576
+ si->norm_gens_size = is_read_vint(is);
577
+ si->ref_cnt = 1;
578
+ if (0 < si->norm_gens_size) {
579
+ int i;
580
+ si->norm_gens = ALLOC_N(int, si->norm_gens_size);
581
+ for (i = si->norm_gens_size - 1; i >= 0; i--) {
582
+ si->norm_gens[i] = is_read_vint(is);
583
+ }
572
584
  }
573
- }
574
- si->use_compound_file = (bool)is_read_byte(is);
585
+ si->use_compound_file = (bool)is_read_byte(is);
586
+ XCATCHALL
587
+ free(si->name);
588
+ free(si);
589
+ XENDTRY
575
590
  return si;
576
591
  }
577
592
 
@@ -4008,17 +4023,23 @@ static void bv_write(BitVector *bv, Store *store, char *name)
4008
4023
  static BitVector *bv_read(Store *store, char *name)
4009
4024
  {
4010
4025
  int i;
4011
- BitVector *bv = ALLOC_AND_ZERO(BitVector);
4012
- InStream *is = store->open_input(store, name);
4026
+ bool success = false;
4027
+ InStream *volatile is = store->open_input(store, name);
4028
+ BitVector *volatile bv = ALLOC_AND_ZERO(BitVector);
4013
4029
  bv->size = (int)is_read_vint(is);
4014
4030
  bv->capa = (bv->size >> 5) + 1;
4015
4031
  bv->bits = ALLOC_AND_ZERO_N(f_u32, bv->capa);
4016
4032
  bv->ref_cnt = 1;
4017
- for (i = (bv->size >> 5); i >= 0; i--) {
4018
- bv->bits[i] = is_read_u32(is);
4019
- }
4020
- is_close(is);
4021
- bv_recount(bv);
4033
+ TRY
4034
+ for (i = (bv->size >> 5); i >= 0; i--) {
4035
+ bv->bits[i] = is_read_u32(is);
4036
+ }
4037
+ bv_recount(bv);
4038
+ success = true;
4039
+ XFINALLY
4040
+ is_close(is);
4041
+ if (!success && bv) bv_destroy(bv);
4042
+ XENDTRY
4022
4043
  return bv;
4023
4044
  }
4024
4045
 
@@ -4297,6 +4318,7 @@ static IndexReader *sr_setup_i(SegmentReader *sr)
4297
4318
  sr->fr_bucket = ary_new();
4298
4319
  }
4299
4320
  XCATCHALL
4321
+ ir->sis = NULL;
4300
4322
  ir_close(ir);
4301
4323
  XENDTRY
4302
4324
 
@@ -4306,10 +4328,9 @@ static IndexReader *sr_setup_i(SegmentReader *sr)
4306
4328
  static IndexReader *sr_open(SegmentInfos *sis, FieldInfos *fis, int si_num,
4307
4329
  bool is_owner)
4308
4330
  {
4309
- IndexReader *ir;
4310
4331
  SegmentReader *sr = ALLOC_AND_ZERO(SegmentReader);
4311
4332
  sr->si = sis->segs[si_num];
4312
- ir = ir_setup(IR(sr), sr->si->store, sis, fis, is_owner);
4333
+ ir_setup(IR(sr), sr->si->store, sis, fis, is_owner);
4313
4334
  return sr_setup_i(sr);
4314
4335
  }
4315
4336
 
@@ -5187,10 +5208,10 @@ HashTable *dw_invert_field(DocWriter *dw,
5187
5208
  if (fld_inv->is_tokenized) {
5188
5209
  Token *tk;
5189
5210
  int pos = -1, num_terms = 0;
5190
- TokenStream *ts = a_get_ts(a, df->name, "");
5191
5211
 
5192
5212
  for (i = 0; i < df_size; i++) {
5193
- ts->reset(ts, df->data[i]);
5213
+ TokenStream *ts = a_get_ts(a, df->name, df->data[i]);
5214
+ /* ts->reset(ts, df->data[i]); no longer being called */
5194
5215
  if (store_offsets) {
5195
5216
  while (NULL != (tk = ts->next(ts))) {
5196
5217
  pos += tk->pos_inc;
@@ -5212,8 +5233,8 @@ HashTable *dw_invert_field(DocWriter *dw,
5212
5233
  }
5213
5234
  }
5214
5235
  }
5236
+ ts_deref(ts);
5215
5237
  }
5216
- ts_deref(ts);
5217
5238
  fld_inv->length = num_terms;
5218
5239
  }
5219
5240
  else {
data/ext/q_boolean.c CHANGED
@@ -1555,18 +1555,25 @@ Query *bq_new(bool coord_disabled)
1555
1555
  return self;
1556
1556
  }
1557
1557
 
1558
+ Query *bq_new_max(bool coord_disabled, int max)
1559
+ {
1560
+ Query *q = bq_new(coord_disabled);
1561
+ BQ(q)->max_clause_cnt = max;
1562
+ return q;
1563
+ }
1564
+
1558
1565
  BooleanClause *bq_add_clause_nr(Query *self, BooleanClause *bc)
1559
1566
  {
1560
- if (BQ(self)->clause_cnt >= BQ(self)->clause_capa) {
1561
- BQ(self)->clause_capa *= 2;
1562
- REALLOC_N(BQ(self)->clauses, BooleanClause *, BQ(self)->clause_capa);
1563
- }
1564
- if (BQ(self)->clause_cnt > BQ(self)->max_clause_cnt) {
1567
+ if (BQ(self)->clause_cnt >= BQ(self)->max_clause_cnt) {
1565
1568
  RAISE(STATE_ERROR, "Two many clauses. The max clause limit is set to "
1566
1569
  "<%d> but your query has <%d> clauses. You can try increasing "
1567
1570
  ":max_clause_count for the BooleanQuery or using a different "
1568
1571
  "type of query.", BQ(self)->clause_cnt, BQ(self)->max_clause_cnt);
1569
1572
  }
1573
+ if (BQ(self)->clause_cnt >= BQ(self)->clause_capa) {
1574
+ BQ(self)->clause_capa *= 2;
1575
+ REALLOC_N(BQ(self)->clauses, BooleanClause *, BQ(self)->clause_capa);
1576
+ }
1570
1577
  BQ(self)->clauses[BQ(self)->clause_cnt] = bc;
1571
1578
  BQ(self)->clause_cnt++;
1572
1579
  return bc;
@@ -1580,9 +1587,16 @@ BooleanClause *bq_add_clause(Query *self, BooleanClause *bc)
1580
1587
 
1581
1588
  BooleanClause *bq_add_query_nr(Query *self, Query *sub_query, enum BC_TYPE occur)
1582
1589
  {
1583
- BooleanClause *bc = bc_new(sub_query, occur);
1590
+ BooleanClause *bc;
1591
+ if (BQ(self)->clause_cnt >= BQ(self)->max_clause_cnt) {
1592
+ RAISE(STATE_ERROR, "Two many clauses. The max clause limit is set to "
1593
+ "<%d> but your query has <%d> clauses. You can try increasing "
1594
+ ":max_clause_count for the BooleanQuery or using a different "
1595
+ "type of query.", BQ(self)->clause_cnt, BQ(self)->max_clause_cnt);
1596
+ }
1597
+ bc = bc_new(sub_query, occur);
1584
1598
  bq_add_clause(self, bc);
1585
- bc_deref(bc); /* bc would have been referenced unnecessarily */
1599
+ bc_deref(bc); /* bc was referenced unnecessarily */
1586
1600
  return bc;
1587
1601
  }
1588
1602