ferret 0.10.9 → 0.10.10
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/index.c +80 -89
- data/ext/index.h +1 -0
- data/ext/q_fuzzy.c +1 -1
- data/ext/r_index.c +50 -1
- data/ext/r_search.c +25 -7
- data/ext/search.c +7 -6
- data/ext/sort.c +4 -4
- data/lib/ferret/index.rb +15 -8
- data/lib/ferret_version.rb +1 -1
- data/test/threading/number_to_spoken.rb +132 -0
- data/test/threading/thread_safety_index_test.rb +78 -0
- data/test/threading/thread_safety_test.rb +137 -0
- data/test/unit/index/tc_index.rb +8 -0
- data/test/unit/search/tc_search_and_sort.rb +3 -3
- metadata +5 -2
data/ext/index.c
CHANGED
@@ -1552,7 +1552,7 @@ TermEnum *ste_new(InStream *is, SegmentFieldIndex *sfi)
|
|
1552
1552
|
|
1553
1553
|
typedef struct TermEnumWrapper
|
1554
1554
|
{
|
1555
|
-
int
|
1555
|
+
int index;
|
1556
1556
|
TermEnum *te;
|
1557
1557
|
int *doc_map;
|
1558
1558
|
IndexReader *ir;
|
@@ -1567,13 +1567,16 @@ typedef struct MultiTermEnum
|
|
1567
1567
|
TermEnumWrapper *tews;
|
1568
1568
|
int size;
|
1569
1569
|
int **field_num_map;
|
1570
|
+
int ti_cnt;
|
1571
|
+
TermInfo *tis;
|
1572
|
+
int *ti_indexes;
|
1570
1573
|
} MultiTermEnum;
|
1571
1574
|
|
1572
1575
|
static bool tew_lt(const TermEnumWrapper *tew1, const TermEnumWrapper *tew2)
|
1573
1576
|
{
|
1574
1577
|
int cmpres = strcmp(tew1->term, tew2->term);
|
1575
1578
|
if (cmpres == 0) {
|
1576
|
-
return tew1->
|
1579
|
+
return tew1->index < tew2->index;
|
1577
1580
|
}
|
1578
1581
|
else {
|
1579
1582
|
return cmpres < 0;
|
@@ -1617,10 +1620,10 @@ static void tew_destroy(TermEnumWrapper *tew)
|
|
1617
1620
|
tew->te->close(tew->te);
|
1618
1621
|
}
|
1619
1622
|
|
1620
|
-
TermEnumWrapper *tew_setup(TermEnumWrapper *tew, int
|
1623
|
+
TermEnumWrapper *tew_setup(TermEnumWrapper *tew, int index, TermEnum *te,
|
1621
1624
|
IndexReader *ir)
|
1622
1625
|
{
|
1623
|
-
tew->
|
1626
|
+
tew->index = index;
|
1624
1627
|
tew->ir = ir;
|
1625
1628
|
tew->te = te;
|
1626
1629
|
tew->term = te->curr_term;
|
@@ -1646,9 +1649,12 @@ static char *mte_next(TermEnum *te)
|
|
1646
1649
|
|
1647
1650
|
te->curr_ti.doc_freq = 0;
|
1648
1651
|
|
1652
|
+
MTE(te)->ti_cnt = 0;
|
1649
1653
|
while ((top != NULL) && (strcmp(te->curr_term, top->term) == 0)) {
|
1650
1654
|
pq_pop(MTE(te)->tew_queue);
|
1651
1655
|
te->curr_ti.doc_freq += top->te->curr_ti.doc_freq;/* increment freq */
|
1656
|
+
MTE(te)->ti_indexes[MTE(te)->ti_cnt] = top->index;
|
1657
|
+
MTE(te)->tis[MTE(te)->ti_cnt++] = top->te->curr_ti;
|
1652
1658
|
if (tew_next(top)) {
|
1653
1659
|
pq_push(MTE(te)->tew_queue, top); /* restore queue */
|
1654
1660
|
}
|
@@ -1711,6 +1717,8 @@ static void mte_close(TermEnum *te)
|
|
1711
1717
|
tew_destroy(&(MTE(te)->tews[i]));
|
1712
1718
|
}
|
1713
1719
|
free(MTE(te)->tews);
|
1720
|
+
free(MTE(te)->tis);
|
1721
|
+
free(MTE(te)->ti_indexes);
|
1714
1722
|
pq_destroy(MTE(te)->tew_queue);
|
1715
1723
|
free(te);
|
1716
1724
|
}
|
@@ -1718,7 +1726,6 @@ static void mte_close(TermEnum *te)
|
|
1718
1726
|
TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
|
1719
1727
|
{
|
1720
1728
|
IndexReader **readers = mr->sub_readers;
|
1721
|
-
int *starts = mr->starts;
|
1722
1729
|
int r_cnt = mr->r_cnt;
|
1723
1730
|
int i;
|
1724
1731
|
IndexReader *reader;
|
@@ -1731,6 +1738,8 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
|
|
1731
1738
|
TE(mte)->close = &mte_close;
|
1732
1739
|
|
1733
1740
|
mte->size = r_cnt;
|
1741
|
+
mte->tis = ALLOC_AND_ZERO_N(TermInfo, r_cnt);
|
1742
|
+
mte->ti_indexes = ALLOC_AND_ZERO_N(int, r_cnt);
|
1734
1743
|
mte->tews = ALLOC_AND_ZERO_N(TermEnumWrapper, r_cnt);
|
1735
1744
|
mte->tew_queue = pq_new(r_cnt, (lt_ft)&tew_lt, (free_ft)NULL);
|
1736
1745
|
mte->field_num_map = mr->field_num_map;
|
@@ -1750,7 +1759,7 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
|
|
1750
1759
|
sub_te = reader->terms(reader, fnum);
|
1751
1760
|
}
|
1752
1761
|
|
1753
|
-
tew = tew_setup(&(mte->tews[i]),
|
1762
|
+
tew = tew_setup(&(mte->tews[i]), i, sub_te, reader);
|
1754
1763
|
if (((term == NULL) && tew_next(tew))
|
1755
1764
|
|| (tew->term && (tew->term[0] != '\0'))) {
|
1756
1765
|
pq_push(mte->tew_queue, tew); /* initialize queue */
|
@@ -1759,7 +1768,7 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
|
|
1759
1768
|
/* add the term_enum_wrapper just in case */
|
1760
1769
|
sub_te = reader->terms(reader, 0);
|
1761
1770
|
sub_te->field_num = -1;
|
1762
|
-
tew_setup(&(mte->tews[i]),
|
1771
|
+
tew_setup(&(mte->tews[i]), i, sub_te, reader);
|
1763
1772
|
}
|
1764
1773
|
}
|
1765
1774
|
|
@@ -2386,45 +2395,29 @@ typedef struct MultiTermDocEnum
|
|
2386
2395
|
{
|
2387
2396
|
TermDocEnum tde;
|
2388
2397
|
int *starts;
|
2389
|
-
char *term;
|
2390
|
-
int field_num;
|
2391
2398
|
int base;
|
2392
2399
|
int ptr;
|
2393
2400
|
int ir_cnt;
|
2394
|
-
|
2401
|
+
char *state;
|
2402
|
+
TermEnum *te;
|
2395
2403
|
IndexReader **irs;
|
2396
2404
|
TermDocEnum **irs_tde;
|
2397
2405
|
TermDocEnum *curr_tde;
|
2398
|
-
TermDocEnum *(*reader_tde_i)(IndexReader *ir);
|
2399
2406
|
} MultiTermDocEnum;
|
2400
2407
|
|
2401
|
-
static TermDocEnum *
|
2402
|
-
{
|
2403
|
-
return ir->term_docs(ir);
|
2404
|
-
}
|
2405
|
-
|
2406
|
-
static TermDocEnum *mtde_get_tde_i(MultiTermDocEnum *mtde, int i)
|
2408
|
+
static TermDocEnum *mtde_next_tde(MultiTermDocEnum *mtde)
|
2407
2409
|
{
|
2408
|
-
|
2409
|
-
|
2410
|
+
mtde->ptr++;
|
2411
|
+
while (mtde->ptr < mtde->ir_cnt && !mtde->state[mtde->ptr]) {
|
2412
|
+
mtde->ptr++;
|
2413
|
+
}
|
2414
|
+
if (mtde->ptr >= mtde->ir_cnt) {
|
2415
|
+
return mtde->curr_tde = NULL;
|
2410
2416
|
}
|
2411
2417
|
else {
|
2412
|
-
|
2413
|
-
|
2414
|
-
|
2415
|
-
|
2416
|
-
if (fnum >= 0) {
|
2417
|
-
TermDocEnum *tde = mtde->irs_tde[i];
|
2418
|
-
if (tde == NULL) {
|
2419
|
-
tde = mtde->irs_tde[i] = mtde->reader_tde_i(mtde->irs[i]);
|
2420
|
-
}
|
2421
|
-
|
2422
|
-
tde->seek(tde, fnum, mtde->term);
|
2423
|
-
return tde;
|
2424
|
-
}
|
2425
|
-
else {
|
2426
|
-
return NULL;
|
2427
|
-
}
|
2418
|
+
TermDocEnum *tde = mtde->curr_tde = mtde->irs_tde[mtde->ptr];
|
2419
|
+
mtde->base = mtde->starts[mtde->ptr];
|
2420
|
+
return tde;
|
2428
2421
|
}
|
2429
2422
|
}
|
2430
2423
|
|
@@ -2435,30 +2428,35 @@ static TermDocEnum *mtde_get_tde_i(MultiTermDocEnum *mtde, int i)
|
|
2435
2428
|
}\
|
2436
2429
|
} while (0)
|
2437
2430
|
|
2438
|
-
static void
|
2431
|
+
static void mtde_seek_te(TermDocEnum *tde, TermEnum *te)
|
2439
2432
|
{
|
2433
|
+
int i;
|
2440
2434
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2441
|
-
|
2442
|
-
|
2435
|
+
memset(mtde->state, 0, mtde->ir_cnt);
|
2436
|
+
for (i = MTE(te)->ti_cnt - 1; i >= 0; i--) {
|
2437
|
+
int index = MTE(te)->ti_indexes[i];
|
2438
|
+
TermDocEnum *tde = mtde->irs_tde[index];
|
2439
|
+
mtde->state[index] = 1;
|
2440
|
+
if (tde->close == stde_close) {
|
2441
|
+
stde_seek_ti(STDE(tde), MTE(te)->tis + i);
|
2442
|
+
} else if (tde->close == stpe_close) {
|
2443
|
+
stpe_seek_ti(STDE(tde), MTE(te)->tis + i);
|
2444
|
+
} else {
|
2445
|
+
tde->seek(tde, MTE(te)->tews[index].te->field_num, te->curr_term);
|
2446
|
+
}
|
2443
2447
|
}
|
2444
|
-
mtde->term = estrdup(term);
|
2445
|
-
mtde->field_num = field_num;
|
2446
2448
|
mtde->base = 0;
|
2447
|
-
mtde->ptr =
|
2448
|
-
mtde
|
2449
|
+
mtde->ptr = -1;
|
2450
|
+
mtde_next_tde(mtde);
|
2449
2451
|
}
|
2450
2452
|
|
2451
|
-
static void
|
2453
|
+
static void mtde_seek(TermDocEnum *tde, int field_num, const char *term)
|
2452
2454
|
{
|
2453
2455
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2454
|
-
|
2455
|
-
|
2456
|
-
|
2457
|
-
|
2458
|
-
mtde->field_num = te->field_num;
|
2459
|
-
mtde->base = 0;
|
2460
|
-
mtde->ptr = 0;
|
2461
|
-
mtde->curr_tde = NULL;
|
2456
|
+
TermEnum *te = mtde->te;
|
2457
|
+
te->set_field(te, field_num);
|
2458
|
+
te->skip_to(te, term);
|
2459
|
+
mtde_seek_te(tde, te);
|
2462
2460
|
}
|
2463
2461
|
|
2464
2462
|
static int mtde_doc_num(TermDocEnum *tde)
|
@@ -2479,10 +2477,7 @@ static bool mtde_next(TermDocEnum *tde)
|
|
2479
2477
|
if (mtde->curr_tde != NULL && mtde->curr_tde->next(mtde->curr_tde)) {
|
2480
2478
|
return true;
|
2481
2479
|
}
|
2482
|
-
else if (mtde
|
2483
|
-
mtde->base = mtde->starts[mtde->ptr];
|
2484
|
-
mtde->curr_tde = mtde_get_tde_i(mtde, mtde->ptr);
|
2485
|
-
mtde->ptr++;
|
2480
|
+
else if (mtde_next_tde(mtde)) {
|
2486
2481
|
return mtde_next(tde);
|
2487
2482
|
}
|
2488
2483
|
else {
|
@@ -2495,19 +2490,11 @@ static int mtde_read(TermDocEnum *tde, int *docs, int *freqs, int req_num)
|
|
2495
2490
|
int i, end = 0, last_end = 0, b;
|
2496
2491
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2497
2492
|
while (true) {
|
2498
|
-
|
2499
|
-
if (mtde->ptr < mtde->ir_cnt) { /* try next segment */
|
2500
|
-
mtde->base = mtde->starts[mtde->ptr];
|
2501
|
-
mtde->curr_tde = mtde_get_tde_i(mtde, mtde->ptr++);
|
2502
|
-
}
|
2503
|
-
else {
|
2504
|
-
return end;
|
2505
|
-
}
|
2506
|
-
}
|
2493
|
+
if (mtde->curr_tde == NULL) return end;
|
2507
2494
|
end += mtde->curr_tde->read(mtde->curr_tde, docs + last_end,
|
2508
2495
|
freqs + last_end, req_num - last_end);
|
2509
2496
|
if (end == last_end) { /* none left in segment */
|
2510
|
-
mtde
|
2497
|
+
if (!mtde_next_tde(mtde)) return end;
|
2511
2498
|
}
|
2512
2499
|
else { /* got some */
|
2513
2500
|
b = mtde->base; /* adjust doc numbers */
|
@@ -2528,19 +2515,15 @@ static bool mtde_skip_to(TermDocEnum *tde, int target_doc_num)
|
|
2528
2515
|
{
|
2529
2516
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2530
2517
|
TermDocEnum *curr_tde;
|
2531
|
-
while (
|
2532
|
-
|
2533
|
-
if (curr_tde && (target_doc_num < mtde->starts[mtde->ptr]) &&
|
2518
|
+
while (NULL != (curr_tde = mtde->curr_tde)) {
|
2519
|
+
if (target_doc_num < mtde->starts[mtde->ptr + 1] &&
|
2534
2520
|
(curr_tde->skip_to(curr_tde, target_doc_num - mtde->base))) {
|
2535
2521
|
return true;
|
2536
2522
|
}
|
2537
2523
|
|
2538
|
-
mtde
|
2539
|
-
mtde->curr_tde = mtde_get_tde_i(mtde, mtde->ptr);
|
2540
|
-
mtde->ptr++;
|
2524
|
+
mtde_next_tde(mtde);
|
2541
2525
|
}
|
2542
2526
|
|
2543
|
-
curr_tde = mtde->curr_tde;
|
2544
2527
|
if (curr_tde) {
|
2545
2528
|
return curr_tde->skip_to(curr_tde, target_doc_num - mtde->base);
|
2546
2529
|
}
|
@@ -2554,20 +2537,18 @@ static void mtde_close(TermDocEnum *tde)
|
|
2554
2537
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2555
2538
|
TermDocEnum *tmp_tde;
|
2556
2539
|
int i = mtde->ir_cnt;
|
2540
|
+
mtde->te->close(mtde->te);
|
2557
2541
|
while (i > 0) {
|
2558
2542
|
i--;
|
2559
|
-
|
2560
|
-
|
2561
|
-
}
|
2562
|
-
}
|
2563
|
-
if (mtde->term != NULL) {
|
2564
|
-
free(mtde->term);
|
2543
|
+
tmp_tde = mtde->irs_tde[i];
|
2544
|
+
tmp_tde->close(tmp_tde);
|
2565
2545
|
}
|
2566
2546
|
free(mtde->irs_tde);
|
2547
|
+
free(mtde->state);
|
2567
2548
|
free(tde);
|
2568
2549
|
}
|
2569
2550
|
|
2570
|
-
TermDocEnum *
|
2551
|
+
TermDocEnum *mtxe_new(MultiReader *mr)
|
2571
2552
|
{
|
2572
2553
|
MultiTermDocEnum *mtde = ALLOC_AND_ZERO(MultiTermDocEnum);
|
2573
2554
|
TermDocEnum *tde = TDE(mtde);
|
@@ -2578,28 +2559,34 @@ TermDocEnum *mtde_new(MultiReader *mr)
|
|
2578
2559
|
tde->next = &mtde_next;
|
2579
2560
|
tde->read = &mtde_read;
|
2580
2561
|
tde->skip_to = &mtde_skip_to;
|
2581
|
-
tde->next_position = NULL;
|
2582
2562
|
tde->close = &mtde_close;
|
2583
2563
|
|
2564
|
+
mtde->state = ALLOC_AND_ZERO_N(char, mr->r_cnt);
|
2565
|
+
mtde->te = ((IndexReader *)mr)->terms((IndexReader *)mr, 0);
|
2584
2566
|
mtde->starts = mr->starts;
|
2585
2567
|
mtde->ir_cnt = mr->r_cnt;
|
2586
2568
|
mtde->irs = mr->sub_readers;
|
2587
|
-
mtde->field_num_map = mr->field_num_map;
|
2588
2569
|
mtde->irs_tde = ALLOC_AND_ZERO_N(TermDocEnum *, mr->r_cnt);
|
2589
|
-
mtde->reader_tde_i = &mtde_reader_tde_i;
|
2590
2570
|
|
2591
2571
|
return tde;
|
2592
2572
|
}
|
2593
2573
|
|
2574
|
+
TermDocEnum *mtde_new(MultiReader *mr)
|
2575
|
+
{
|
2576
|
+
int i;
|
2577
|
+
TermDocEnum *tde = mtxe_new(mr);
|
2578
|
+
tde->next_position = NULL;
|
2579
|
+
for (i = mr->r_cnt - 1; i >= 0; i--) {
|
2580
|
+
IndexReader *ir = mr->sub_readers[i];
|
2581
|
+
MTDE(tde)->irs_tde[i] = ir->term_docs(ir);
|
2582
|
+
}
|
2583
|
+
return tde;
|
2584
|
+
}
|
2585
|
+
|
2594
2586
|
/****************************************************************************
|
2595
2587
|
* MultiTermPosEnum
|
2596
2588
|
****************************************************************************/
|
2597
2589
|
|
2598
|
-
TermDocEnum *mtpe_reader_tde_i(IndexReader *ir)
|
2599
|
-
{
|
2600
|
-
return ir->term_positions(ir);
|
2601
|
-
}
|
2602
|
-
|
2603
2590
|
int mtpe_next_position(TermDocEnum *tde)
|
2604
2591
|
{
|
2605
2592
|
CHECK_CURR_TDE("next_position");
|
@@ -2608,9 +2595,13 @@ int mtpe_next_position(TermDocEnum *tde)
|
|
2608
2595
|
|
2609
2596
|
TermDocEnum *mtpe_new(MultiReader *mr)
|
2610
2597
|
{
|
2611
|
-
|
2598
|
+
int i;
|
2599
|
+
TermDocEnum *tde = mtxe_new(mr);
|
2612
2600
|
tde->next_position = &mtpe_next_position;
|
2613
|
-
|
2601
|
+
for (i = mr->r_cnt - 1; i >= 0; i--) {
|
2602
|
+
IndexReader *ir = mr->sub_readers[i];
|
2603
|
+
MTDE(tde)->irs_tde[i] = ir->term_positions(ir);
|
2604
|
+
}
|
2614
2605
|
return tde;
|
2615
2606
|
}
|
2616
2607
|
|
data/ext/index.h
CHANGED
@@ -378,6 +378,7 @@ struct TermDocEnum
|
|
378
378
|
{
|
379
379
|
void (*seek)(TermDocEnum *tde, int field_num, const char *term);
|
380
380
|
void (*seek_te)(TermDocEnum *tde, TermEnum *te);
|
381
|
+
void (*seek_ti)(TermDocEnum *tde, TermInfo *ti);
|
381
382
|
int (*doc_num)(TermDocEnum *tde);
|
382
383
|
int (*freq)(TermDocEnum *tde);
|
383
384
|
bool (*next)(TermDocEnum *tde);
|
data/ext/q_fuzzy.c
CHANGED
data/ext/r_index.c
CHANGED
@@ -564,6 +564,19 @@ frt_fis_to_s(VALUE self)
|
|
564
564
|
free(fis_s);
|
565
565
|
return rfis_s;
|
566
566
|
}
|
567
|
+
|
568
|
+
/*
|
569
|
+
* call-seq:
|
570
|
+
* fis.size -> int
|
571
|
+
*
|
572
|
+
* Return the number of fields in the FieldInfos object.
|
573
|
+
*/
|
574
|
+
static VALUE
|
575
|
+
frt_fis_size(VALUE self)
|
576
|
+
{
|
577
|
+
FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
|
578
|
+
return INT2FIX(fis->size);
|
579
|
+
}
|
567
580
|
|
568
581
|
/*
|
569
582
|
* call-seq:
|
@@ -2225,7 +2238,7 @@ frt_ir_get_doc(int argc, VALUE *argv, VALUE self)
|
|
2225
2238
|
pos = (pos < 0) ? (max + pos) : pos;
|
2226
2239
|
if (pos < 0 || pos >= max) {
|
2227
2240
|
rb_raise(rb_eArgError, ":%d is out of range [%d..%d] for "
|
2228
|
-
"
|
2241
|
+
"IndexReader#[]", pos, 0, max,
|
2229
2242
|
rb_id2name(SYM2ID(argv)));
|
2230
2243
|
}
|
2231
2244
|
return frt_get_lazy_doc(ir->get_lazy_doc(ir, pos));
|
@@ -2425,6 +2438,25 @@ frt_ir_terms_from(VALUE self, VALUE rfield, VALUE rterm)
|
|
2425
2438
|
StringValuePtr(rterm)));
|
2426
2439
|
}
|
2427
2440
|
|
2441
|
+
/*
|
2442
|
+
* call-seq:
|
2443
|
+
* index_reader.term_count(field) -> int
|
2444
|
+
*
|
2445
|
+
* Same return a count of the number of terms in the field
|
2446
|
+
*/
|
2447
|
+
static VALUE
|
2448
|
+
frt_ir_term_count(VALUE self, VALUE rfield)
|
2449
|
+
{
|
2450
|
+
IndexReader *ir = (IndexReader *)DATA_PTR(self);
|
2451
|
+
TermEnum *te = ir_terms(ir, frt_field(rfield));
|
2452
|
+
int count = 0;
|
2453
|
+
while (te->next(te)) {
|
2454
|
+
count++;
|
2455
|
+
}
|
2456
|
+
te->close(te);
|
2457
|
+
return INT2FIX(count);
|
2458
|
+
}
|
2459
|
+
|
2428
2460
|
/*
|
2429
2461
|
* call-seq:
|
2430
2462
|
* index_reader.fields -> array of field-names
|
@@ -2483,6 +2515,19 @@ frt_ir_tk_fields(VALUE self)
|
|
2483
2515
|
return rfield_names;
|
2484
2516
|
}
|
2485
2517
|
|
2518
|
+
/*
|
2519
|
+
* call-seq:
|
2520
|
+
* index_reader.version -> int
|
2521
|
+
*
|
2522
|
+
* Returns the current version of the index reader.
|
2523
|
+
*/
|
2524
|
+
static VALUE
|
2525
|
+
frt_ir_version(VALUE self)
|
2526
|
+
{
|
2527
|
+
IndexReader *ir = (IndexReader *)DATA_PTR(self);
|
2528
|
+
return INT2FIX(ir->sis->version);
|
2529
|
+
}
|
2530
|
+
|
2486
2531
|
/****************************************************************************
|
2487
2532
|
*
|
2488
2533
|
* Init Functions
|
@@ -2708,6 +2753,7 @@ Init_FieldInfos(void)
|
|
2708
2753
|
rb_define_method(cFieldInfos, "add_field", frt_fis_add_field, -1);
|
2709
2754
|
rb_define_method(cFieldInfos, "each", frt_fis_each, 0);
|
2710
2755
|
rb_define_method(cFieldInfos, "to_s", frt_fis_to_s, 0);
|
2756
|
+
rb_define_method(cFieldInfos, "size", frt_fis_size, 0);
|
2711
2757
|
rb_define_method(cFieldInfos, "create_index",
|
2712
2758
|
frt_fis_create_index, 1);
|
2713
2759
|
rb_define_method(cFieldInfos, "fields", frt_fis_get_fields, 0);
|
@@ -3188,6 +3234,7 @@ Init_IndexReader(void)
|
|
3188
3234
|
{
|
3189
3235
|
cIndexReader = rb_define_class_under(mIndex, "IndexReader", rb_cObject);
|
3190
3236
|
rb_define_alloc_func(cIndexReader, frt_data_alloc);
|
3237
|
+
/*rb_define_singleton_method(cIndexReader, "version", frt_class_ir_version, 0); */
|
3191
3238
|
rb_define_method(cIndexReader, "initialize", frt_ir_init, 1);
|
3192
3239
|
rb_define_method(cIndexReader, "set_norm", frt_ir_set_norm, 3);
|
3193
3240
|
rb_define_method(cIndexReader, "norms", frt_ir_norms, 1);
|
@@ -3212,10 +3259,12 @@ Init_IndexReader(void)
|
|
3212
3259
|
rb_define_method(cIndexReader, "doc_freq", frt_ir_doc_freq, 2);
|
3213
3260
|
rb_define_method(cIndexReader, "terms", frt_ir_terms, 1);
|
3214
3261
|
rb_define_method(cIndexReader, "terms_from", frt_ir_terms_from, 2);
|
3262
|
+
rb_define_method(cIndexReader, "term_count", frt_ir_term_count, 1);
|
3215
3263
|
rb_define_method(cIndexReader, "fields", frt_ir_fields, 0);
|
3216
3264
|
rb_define_method(cIndexReader, "field_names", frt_ir_fields, 0);
|
3217
3265
|
rb_define_method(cIndexReader, "field_infos", frt_ir_field_infos, 0);
|
3218
3266
|
rb_define_method(cIndexReader, "tokenized_fields", frt_ir_tk_fields, 0);
|
3267
|
+
rb_define_method(cIndexReader, "version", frt_ir_version, 0);
|
3219
3268
|
}
|
3220
3269
|
|
3221
3270
|
/* rdoc hack
|
data/ext/r_search.c
CHANGED
@@ -104,6 +104,7 @@ static ID id_score;
|
|
104
104
|
static ID id_hits;
|
105
105
|
static ID id_total_hits;
|
106
106
|
static ID id_max_score;
|
107
|
+
static ID id_searcher;
|
107
108
|
|
108
109
|
/* Search */
|
109
110
|
static VALUE sym_offset;
|
@@ -152,7 +153,7 @@ frt_get_hit(Hit *hit)
|
|
152
153
|
****************************************************************************/
|
153
154
|
|
154
155
|
static VALUE
|
155
|
-
frt_get_td(TopDocs *td)
|
156
|
+
frt_get_td(TopDocs *td, VALUE rsearcher)
|
156
157
|
{
|
157
158
|
int i;
|
158
159
|
VALUE rtop_docs;
|
@@ -167,6 +168,7 @@ frt_get_td(TopDocs *td)
|
|
167
168
|
INT2FIX(td->total_hits),
|
168
169
|
hit_ary,
|
169
170
|
rb_float_new((double)td->max_score),
|
171
|
+
rsearcher,
|
170
172
|
NULL);
|
171
173
|
td_destroy(td);
|
172
174
|
return rtop_docs;
|
@@ -174,20 +176,26 @@ frt_get_td(TopDocs *td)
|
|
174
176
|
|
175
177
|
/*
|
176
178
|
* call-seq:
|
177
|
-
* top_doc.to_s -> string
|
179
|
+
* top_doc.to_s(field = :id) -> string
|
178
180
|
*
|
179
181
|
* Returns a string represention of the top_doc in readable format.
|
180
182
|
*/
|
181
183
|
static VALUE
|
182
|
-
frt_td_to_s(VALUE self)
|
184
|
+
frt_td_to_s(int argc, VALUE *argv, VALUE self)
|
183
185
|
{
|
184
186
|
int i;
|
185
187
|
VALUE rhits = rb_funcall(self, id_hits, 0);
|
188
|
+
Searcher *sea = (Searcher *)DATA_PTR(rb_funcall(self, id_searcher, 0));
|
186
189
|
const int len = RARRAY(rhits)->len;
|
187
190
|
char *str = ALLOC_N(char, len * 64 + 100);
|
188
191
|
char *s = str;
|
192
|
+
char *field = "id";
|
189
193
|
VALUE rstr;
|
190
194
|
|
195
|
+
if (argc) {
|
196
|
+
field = frt_field(argv[0]);
|
197
|
+
}
|
198
|
+
|
191
199
|
sprintf(s, "TopDocs: total_hits = %d, max_score = %f [\n",
|
192
200
|
FIX2INT(rb_funcall(self, id_total_hits, 0)),
|
193
201
|
NUM2DBL(rb_funcall(self, id_max_score, 0)));
|
@@ -195,10 +203,18 @@ frt_td_to_s(VALUE self)
|
|
195
203
|
|
196
204
|
for (i = 0; i < len; i++) {
|
197
205
|
VALUE rhit = RARRAY(rhits)->ptr[i];
|
198
|
-
|
199
|
-
|
206
|
+
int doc_id = FIX2INT(rb_funcall(rhit, id_doc, 0));
|
207
|
+
char *value = "";
|
208
|
+
LazyDoc *lzd = sea->get_lazy_doc(sea, doc_id);
|
209
|
+
LazyDocField *lzdf = h_get(lzd->field_dict, field);
|
210
|
+
if (NULL != lzdf) {
|
211
|
+
value = lazy_df_get_data(lzdf, 0);
|
212
|
+
}
|
213
|
+
|
214
|
+
sprintf(s, "\t%d \"%s\": %f\n", doc_id, value,
|
200
215
|
NUM2DBL(rb_funcall(rhit, id_score, 0)));
|
201
216
|
s += strlen(s);
|
217
|
+
lazy_doc_close(lzd);
|
202
218
|
}
|
203
219
|
|
204
220
|
sprintf(s, "]\n");
|
@@ -2388,7 +2404,7 @@ frt_sea_search(int argc, VALUE *argv, VALUE self)
|
|
2388
2404
|
Query *query;
|
2389
2405
|
rb_scan_args(argc, argv, "11", &rquery, &roptions);
|
2390
2406
|
Data_Get_Struct(rquery, Query, query);
|
2391
|
-
return frt_get_td(frt_sea_search_internal(query, roptions, sea));
|
2407
|
+
return frt_get_td(frt_sea_search_internal(query, roptions, sea), self);
|
2392
2408
|
}
|
2393
2409
|
|
2394
2410
|
/*
|
@@ -2760,13 +2776,15 @@ Init_TopDocs(void)
|
|
2760
2776
|
"total_hits",
|
2761
2777
|
"hits",
|
2762
2778
|
"max_score",
|
2779
|
+
"searcher",
|
2763
2780
|
NULL);
|
2764
2781
|
rb_set_class_path(cTopDocs, mSearch, td_class);
|
2765
2782
|
rb_const_set(mSearch, rb_intern(td_class), cTopDocs);
|
2766
|
-
rb_define_method(cTopDocs, "to_s", frt_td_to_s,
|
2783
|
+
rb_define_method(cTopDocs, "to_s", frt_td_to_s, -1);
|
2767
2784
|
id_hits = rb_intern("hits");
|
2768
2785
|
id_total_hits = rb_intern("total_hits");
|
2769
2786
|
id_max_score = rb_intern("max_score");
|
2787
|
+
id_searcher = rb_intern("searcher");
|
2770
2788
|
}
|
2771
2789
|
|
2772
2790
|
/*
|
data/ext/search.c
CHANGED
@@ -122,11 +122,12 @@ static void hit_pq_down(PriorityQueue *pq)
|
|
122
122
|
static Hit *hit_pq_pop(PriorityQueue *pq)
|
123
123
|
{
|
124
124
|
if (pq->size > 0) {
|
125
|
-
Hit
|
126
|
-
|
127
|
-
|
125
|
+
Hit **heap = (Hit **)pq->heap;
|
126
|
+
Hit *result = heap[1]; /* save first value */
|
127
|
+
heap[1] = heap[pq->size]; /* move last to first */
|
128
|
+
heap[pq->size] = NULL;
|
128
129
|
pq->size--;
|
129
|
-
hit_pq_down(pq);
|
130
|
+
hit_pq_down(pq); /* adjust heap */
|
130
131
|
return result;
|
131
132
|
}
|
132
133
|
else {
|
@@ -1079,8 +1080,8 @@ static TopDocs *isea_search_w(Searcher *self,
|
|
1079
1080
|
for (i = num_docs - 1; i >= 0; i--) {
|
1080
1081
|
score_docs[i] = hq_pop(hq);
|
1081
1082
|
/*
|
1082
|
-
|
1083
|
-
|
1083
|
+
printf("score_docs[i][%d] = [%ld] => %d-->%f\n", i,
|
1084
|
+
score_docs[i], score_docs[i]->doc, score_docs[i]->score);
|
1084
1085
|
*/
|
1085
1086
|
}
|
1086
1087
|
}
|
data/ext/sort.c
CHANGED
@@ -426,8 +426,8 @@ int sf_string_compare(void *index, Hit *hit1, Hit *hit2)
|
|
426
426
|
char *s2 = ((StringIndex *)index)->values[
|
427
427
|
((StringIndex *)index)->index[hit2->doc]];
|
428
428
|
|
429
|
-
if (s1 == NULL) return
|
430
|
-
if (s2 == NULL) return 1;
|
429
|
+
if (s1 == NULL) return s2 ? 1 : 0;
|
430
|
+
if (s2 == NULL) return -1;
|
431
431
|
|
432
432
|
#ifdef POSH_OS_WIN32
|
433
433
|
return strcmp(s1, s2);
|
@@ -874,8 +874,8 @@ bool fdshq_lt(FieldDoc *fd1, FieldDoc *fd2)
|
|
874
874
|
do {
|
875
875
|
char *s1 = cmps1[i].val.s;
|
876
876
|
char *s2 = cmps2[i].val.s;
|
877
|
-
if (s1 == NULL) c = s2 ?
|
878
|
-
else if (s2 == NULL) c = 1;
|
877
|
+
if (s1 == NULL) c = s2 ? 1 : 0;
|
878
|
+
else if (s2 == NULL) c = -1;
|
879
879
|
#ifdef POSH_OS_WIN32
|
880
880
|
else c = strcmp(s1, s2);
|
881
881
|
#else
|
data/lib/ferret/index.rb
CHANGED
@@ -179,11 +179,13 @@ module Ferret::Index
|
|
179
179
|
# Alternatively you may want to use the HTML entity
|
180
180
|
# … or the UTF-8 string "\342\200\246".
|
181
181
|
def highlight(query, doc_id, options = {})
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
182
|
+
@dir.synchronize do
|
183
|
+
ensure_searcher_open()
|
184
|
+
@searcher.highlight(do_process_query(query),
|
185
|
+
doc_id,
|
186
|
+
options[:field]||@options[:default_field],
|
187
|
+
options)
|
188
|
+
end
|
187
189
|
end
|
188
190
|
|
189
191
|
# Closes this index by closing its associated reader and writer objects.
|
@@ -273,9 +275,14 @@ module Ferret::Index
|
|
273
275
|
end
|
274
276
|
ensure_writer_open()
|
275
277
|
|
276
|
-
|
277
|
-
|
278
|
-
|
278
|
+
if analyzer
|
279
|
+
old_analyzer = @writer.analyzer
|
280
|
+
@writer.analyzer = analyzer
|
281
|
+
@writer.add_document(doc)
|
282
|
+
@writer.analyzer = old_analyzer
|
283
|
+
else
|
284
|
+
@writer.add_document(doc)
|
285
|
+
end
|
279
286
|
|
280
287
|
flush() if @auto_flush
|
281
288
|
end
|
data/lib/ferret_version.rb
CHANGED
@@ -0,0 +1,132 @@
|
|
1
|
+
# Author: Matthew D Moss
|
2
|
+
#
|
3
|
+
# Writtern for ruby quiz #25
|
4
|
+
#
|
5
|
+
class JapaneseTranslator
|
6
|
+
# My knowledge of counting Japanese is limited, so this may not
|
7
|
+
# be entirely correct; in particular, I don't know what rules
|
8
|
+
# to follow after 'hyaku man' (1,000,000).
|
9
|
+
# I also combine a digit with its group, such as 'gohyaku' rather
|
10
|
+
# than 'go hyaku'; I just like reading it better that way.
|
11
|
+
|
12
|
+
DIGITS = %w(zero ichi ni san yon go roku nana hachi kyu)
|
13
|
+
GROUPS = %w(nothingtoseeheremovealong ju hyaku sen)
|
14
|
+
MAN = 10000
|
15
|
+
|
16
|
+
def to_spoken(val)
|
17
|
+
case val <=> 0
|
18
|
+
when -1
|
19
|
+
'- ' + to_spoken(-val)
|
20
|
+
when 0
|
21
|
+
DIGITS[0]
|
22
|
+
else
|
23
|
+
group(val, 0)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def group(val, level)
|
30
|
+
if val >= MAN
|
31
|
+
group(val / MAN, 0) + 'man ' + group(val % MAN, 0)
|
32
|
+
else
|
33
|
+
case val
|
34
|
+
when 0
|
35
|
+
''
|
36
|
+
when 1
|
37
|
+
level == 0 ? DIGITS[val] : GROUPS[level]
|
38
|
+
when 2...10
|
39
|
+
DIGITS[val] + (GROUPS[level] if level > 0).to_s
|
40
|
+
else
|
41
|
+
group(val / 10, level+1) + ' ' + group(val % 10, level)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
class USEnglishTranslator
|
49
|
+
# Formal, US English. Optional 'and'. Will not produce things
|
50
|
+
# such as 'twelve hundred' but rather 'one thousand two hundred'.
|
51
|
+
# The use of 'and' is incomplete; it is sometimes missed.
|
52
|
+
|
53
|
+
DIGITS = %w(zero one two three four five six seven eight nine)
|
54
|
+
TEENS = %w(ten eleven twelve thirteen fourteen fifteen sixteen
|
55
|
+
seventeen eighteen nineteen)
|
56
|
+
TENS = %w(hello world twenty thirty forty fifty sixty seventy
|
57
|
+
eighty ninety)
|
58
|
+
GROUPS = %w(thousand million billion trillion quadrillion
|
59
|
+
quintillion sextillion septillion octillion nonillion
|
60
|
+
decillion)
|
61
|
+
K = 1000
|
62
|
+
|
63
|
+
def initialize(conjunction = true)
|
64
|
+
@conjunction = conjunction
|
65
|
+
end
|
66
|
+
|
67
|
+
def to_spoken(val)
|
68
|
+
case val <=> 0
|
69
|
+
when -1
|
70
|
+
'negative ' + to_spoken(-val)
|
71
|
+
when 0
|
72
|
+
DIGITS[0]
|
73
|
+
else
|
74
|
+
group(val, 0).flatten.join(' ')
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
def group(val, level)
|
81
|
+
x = group(val / K, level + 1) << GROUPS[level] if val >= K
|
82
|
+
x.to_a << under_1000(val % K, level)
|
83
|
+
end
|
84
|
+
|
85
|
+
def under_1000(val, level)
|
86
|
+
x = [DIGITS[val / 100]] << 'hundred' if val >= 100
|
87
|
+
x.to_a << under_100(val % 100, (level == 0 and not x.nil?))
|
88
|
+
end
|
89
|
+
|
90
|
+
def under_100(val, junction)
|
91
|
+
x = [('and' if @conjunction and junction)] # wyf?
|
92
|
+
case val
|
93
|
+
when 0
|
94
|
+
[]
|
95
|
+
when 1...10
|
96
|
+
x << DIGITS[val]
|
97
|
+
when 10...20
|
98
|
+
x << TEENS[val - 10]
|
99
|
+
else
|
100
|
+
d = val % 10
|
101
|
+
x << (TENS[val / 10] + ('-' + DIGITS[d] if d != 0).to_s)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
class Integer
|
108
|
+
def to_spoken(translator = USEnglishTranslator.new)
|
109
|
+
translator.to_spoken(self).squeeze(' ').strip
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
if $0 == __FILE__
|
114
|
+
SAMPLES = [ 0, 1, 2, 5, 10, 11, 14, 18, 20, 21, 29, 33, 42, 50, 87, 99,
|
115
|
+
100, 101, 110, 167, 199, 200, 201, 276, 300, 314, 500, 610,
|
116
|
+
1000, 1039, 1347, 2309, 3098, 23501, 32767, 70000, 5480283,
|
117
|
+
2435489238, 234100090000, -42, -2001 ]
|
118
|
+
|
119
|
+
TRANSLATORS = { 'US English' => USEnglishTranslator.new,
|
120
|
+
'Japanese' => JapaneseTranslator.new }
|
121
|
+
|
122
|
+
|
123
|
+
# main
|
124
|
+
TRANSLATORS.each do |lang, translator|
|
125
|
+
puts
|
126
|
+
puts lang
|
127
|
+
puts '-' * lang.length
|
128
|
+
SAMPLES.each do |val|
|
129
|
+
puts "%12d => %s" % [val, val.to_spoken(translator)]
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../test_helper"
|
2
|
+
require File.dirname(__FILE__) + "/number_to_spoken.rb"
|
3
|
+
require 'thread'
|
4
|
+
|
5
|
+
class IndexThreadSafetyTest < Test::Unit::TestCase
|
6
|
+
include Ferret::Index
|
7
|
+
|
8
|
+
INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
|
9
|
+
ITERATIONS = 1000
|
10
|
+
NUM_THREADS = 2
|
11
|
+
ANALYZER = Ferret::Analysis::StandardAnalyzer.new()
|
12
|
+
|
13
|
+
def setup
|
14
|
+
index = Index.new(:path => INDEX_DIR,
|
15
|
+
:create => true,
|
16
|
+
:analyzer => ANALYZER,
|
17
|
+
:default_field => :content)
|
18
|
+
index.close
|
19
|
+
end
|
20
|
+
|
21
|
+
def indexing_thread()
|
22
|
+
index = Index.new(:path => INDEX_DIR,
|
23
|
+
:analyzer => ANALYZER,
|
24
|
+
:default_field => :content)
|
25
|
+
|
26
|
+
ITERATIONS.times do
|
27
|
+
choice = rand()
|
28
|
+
|
29
|
+
if choice > 0.98
|
30
|
+
do_optimize(index)
|
31
|
+
elsif choice > 0.7
|
32
|
+
do_delete_doc(index)
|
33
|
+
elsif choice > 0.5
|
34
|
+
do_search(index)
|
35
|
+
else
|
36
|
+
do_add_doc(index)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def do_optimize(index)
|
42
|
+
puts "Optimizing the index"
|
43
|
+
index.optimize
|
44
|
+
end
|
45
|
+
|
46
|
+
def do_delete_doc(index)
|
47
|
+
return if index.size == 0
|
48
|
+
doc_num = rand(index.size)
|
49
|
+
puts "Deleting #{doc_num} from index which has#{index.has_deletions? ? "" : " no"} deletions"
|
50
|
+
puts "document was already deleted" if (index.deleted?(doc_num))
|
51
|
+
index.delete(doc_num)
|
52
|
+
end
|
53
|
+
|
54
|
+
def do_add_doc(index)
|
55
|
+
n = rand(0xFFFFFFFF)
|
56
|
+
d = {:id => n, :content => n.to_spoken}
|
57
|
+
puts("Adding #{n}")
|
58
|
+
index << d
|
59
|
+
end
|
60
|
+
|
61
|
+
def do_search(index)
|
62
|
+
n = rand(0xFFFFFFFF)
|
63
|
+
puts("Searching for #{n}")
|
64
|
+
hits = index.search_each(n.to_spoken, :num_docs => 3) do |d, s|
|
65
|
+
puts "Hit for #{n}: #{index[d][:id]} - #{s}"
|
66
|
+
end
|
67
|
+
puts("Searched for #{n}: total = #{hits}")
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_threading
|
71
|
+
threads = []
|
72
|
+
NUM_THREADS.times do
|
73
|
+
threads << Thread.new { indexing_thread }
|
74
|
+
end
|
75
|
+
|
76
|
+
threads.each {|t| t.join}
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../test_helper"
|
2
|
+
require File.dirname(__FILE__) + "/../utils/number_to_spoken.rb"
|
3
|
+
require 'thread'
|
4
|
+
|
5
|
+
class ThreadSafetyTest
|
6
|
+
include Ferret::Index
|
7
|
+
include Ferret::Search
|
8
|
+
include Ferret::Store
|
9
|
+
include Ferret::Document
|
10
|
+
|
11
|
+
def initialize(options)
|
12
|
+
@options = options
|
13
|
+
end
|
14
|
+
|
15
|
+
INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
|
16
|
+
ANALYZER = Ferret::Analysis::Analyzer.new()
|
17
|
+
ITERATIONS = 19
|
18
|
+
@@searcher = nil
|
19
|
+
|
20
|
+
def run_index_thread(writer)
|
21
|
+
reopen_interval = 30 + rand(60)
|
22
|
+
|
23
|
+
use_compound_file = false
|
24
|
+
|
25
|
+
(400*ITERATIONS).times do |i|
|
26
|
+
d = Document.new()
|
27
|
+
n = rand(0xFFFFFFFF)
|
28
|
+
d << Field.new("id", n.to_s, Field::Store::YES, Field::Index::UNTOKENIZED)
|
29
|
+
d << Field.new("contents", n.to_spoken, Field::Store::NO, Field::Index::TOKENIZED)
|
30
|
+
puts("Adding #{n}")
|
31
|
+
|
32
|
+
# Switch between single and multiple file segments
|
33
|
+
use_compound_file = (rand < 0.5)
|
34
|
+
writer.use_compound_file = use_compound_file
|
35
|
+
|
36
|
+
writer << d
|
37
|
+
|
38
|
+
if (i % reopen_interval == 0)
|
39
|
+
writer.close()
|
40
|
+
writer = IndexWriter.new(INDEX_DIR, :analyzer => ANALYZER)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
writer.close()
|
45
|
+
rescue => e
|
46
|
+
puts e
|
47
|
+
puts e.backtrace
|
48
|
+
raise e
|
49
|
+
end
|
50
|
+
|
51
|
+
def run_search_thread(use_global)
|
52
|
+
reopen_interval = 10 + rand(20)
|
53
|
+
|
54
|
+
unless use_global
|
55
|
+
searcher = IndexSearcher.new(INDEX_DIR)
|
56
|
+
end
|
57
|
+
|
58
|
+
(50*ITERATIONS).times do |i|
|
59
|
+
search_for(rand(0xFFFFFFFF), (searcher.nil? ? @@searcher : searcher))
|
60
|
+
if (i%reopen_interval == 0)
|
61
|
+
if (searcher == nil)
|
62
|
+
@@searcher = IndexSearcher.new(INDEX_DIR)
|
63
|
+
else
|
64
|
+
searcher.close()
|
65
|
+
searcher = IndexSearcher.new(INDEX_DIR)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
rescue => e
|
70
|
+
puts e
|
71
|
+
puts e.backtrace
|
72
|
+
raise e
|
73
|
+
end
|
74
|
+
|
75
|
+
def search_for(n, searcher)
|
76
|
+
puts("Searching for #{n}")
|
77
|
+
hits =
|
78
|
+
searcher.search(Ferret::QueryParser.parse(n.to_spoken, "contents", :analyzer => ANALYZER),
|
79
|
+
:num_docs => 3)
|
80
|
+
puts("Search for #{n}: total = #{hits.size}")
|
81
|
+
hits.each do |d, s|
|
82
|
+
puts "Hit for #{n}: #{searcher.reader.get_document(d)["id"]} - #{s}"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def run_test_threads
|
87
|
+
|
88
|
+
threads = []
|
89
|
+
unless @options[:read_only]
|
90
|
+
writer = IndexWriter.new(INDEX_DIR, :analyzer => ANALYZER,
|
91
|
+
:create => !@options[:add])
|
92
|
+
|
93
|
+
threads << Thread.new { run_index_thread(writer) }
|
94
|
+
|
95
|
+
sleep(1)
|
96
|
+
end
|
97
|
+
|
98
|
+
threads << Thread.new { run_search_thread(false)}
|
99
|
+
|
100
|
+
@@searcher = IndexSearcher.new(INDEX_DIR)
|
101
|
+
threads << Thread.new { run_search_thread(true)}
|
102
|
+
|
103
|
+
threads << Thread.new { run_search_thread(true)}
|
104
|
+
|
105
|
+
threads.each {|t| t.join}
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
if $0 == __FILE__
|
111
|
+
require 'optparse'
|
112
|
+
|
113
|
+
OPTIONS = {
|
114
|
+
:all => false,
|
115
|
+
:read_only => false,
|
116
|
+
}
|
117
|
+
|
118
|
+
ARGV.options do |opts|
|
119
|
+
script_name = File.basename($0)
|
120
|
+
opts.banner = "Usage: ruby #{script_name} [options]"
|
121
|
+
|
122
|
+
opts.separator ""
|
123
|
+
|
124
|
+
opts.on("-r", "--read-only", "Read Only.") { OPTIONS[:all] = true }
|
125
|
+
opts.on("-a", "--all", "All.") { OPTIONS[:read_only] = true }
|
126
|
+
|
127
|
+
opts.separator ""
|
128
|
+
|
129
|
+
opts.on("-h", "--help",
|
130
|
+
"Show this help message.") { puts opts; exit }
|
131
|
+
|
132
|
+
opts.parse!
|
133
|
+
end
|
134
|
+
|
135
|
+
tst = ThreadSafetyTest.new(OPTIONS)
|
136
|
+
tst.run_test_threads
|
137
|
+
end
|
data/test/unit/index/tc_index.rb
CHANGED
@@ -766,4 +766,12 @@ class IndexTest < Test::Unit::TestCase
|
|
766
766
|
|
767
767
|
index.close
|
768
768
|
end
|
769
|
+
|
770
|
+
def test_changing_analyzer
|
771
|
+
index = Ferret::I.new
|
772
|
+
a = Ferret::Analysis::WhiteSpaceAnalyzer.new(false)
|
773
|
+
index.add_document({:content => "Content With Capitals"}, a)
|
774
|
+
tv = index.reader.term_vector(0, :content)
|
775
|
+
assert_equal("Capitals", tv.terms[0].text)
|
776
|
+
end
|
769
777
|
end
|
@@ -16,8 +16,8 @@ class SearchAndSortTest < Test::Unit::TestCase
|
|
16
16
|
{:x => "findall", :string => "c", :int => "5", :float => "0.1"}, # 3 3
|
17
17
|
{:x => "findall", :string => "e", :int => "2", :float => "0.001"}, # 5 1
|
18
18
|
{:x => "findall", :string => "g", :int => "1", :float => "1.0"}, # 3 3
|
19
|
-
{:x => "findall", :string =>
|
20
|
-
{:x => "findall", :string => "
|
19
|
+
{:x => "findall", :string => nil, :int => "3", :float => "0.0001"}, # 6 2
|
20
|
+
{:x => "findall", :string => "", :int => "4", :float => "10.0"}, # 4 0
|
21
21
|
{:x => "findall", :string => "h", :int => "5", :float => "0.00001"}, # 7 3
|
22
22
|
{:x => "findall", :string => "f", :int => "2", :float => "100.0"}, # 5 1
|
23
23
|
{:x => "findall", :string => "d", :int => "3", :float => "1000.0"}, # 6 2
|
@@ -145,7 +145,7 @@ class SearchAndSortTest < Test::Unit::TestCase
|
|
145
145
|
|
146
146
|
## str
|
147
147
|
sf_str = SortField.new(:string, {:type => :string})
|
148
|
-
do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,4
|
148
|
+
do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,5,4], [sf_str, SortField::SCORE])
|
149
149
|
do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,4,5], "string")
|
150
150
|
|
151
151
|
## auto
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.10.
|
7
|
-
date: 2006-
|
6
|
+
version: 0.10.10
|
7
|
+
date: 2006-10-08 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -198,6 +198,9 @@ files:
|
|
198
198
|
- test/unit/search/tc_search_and_sort.rb
|
199
199
|
- test/unit/search/tm_searcher.rb
|
200
200
|
- test/unit/query_parser/tc_query_parser.rb
|
201
|
+
- test/threading/thread_safety_index_test.rb
|
202
|
+
- test/threading/thread_safety_test.rb
|
203
|
+
- test/threading/number_to_spoken.rb
|
201
204
|
test_files: []
|
202
205
|
|
203
206
|
rdoc_options:
|