ferret 0.10.9 → 0.10.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/index.c +80 -89
- data/ext/index.h +1 -0
- data/ext/q_fuzzy.c +1 -1
- data/ext/r_index.c +50 -1
- data/ext/r_search.c +25 -7
- data/ext/search.c +7 -6
- data/ext/sort.c +4 -4
- data/lib/ferret/index.rb +15 -8
- data/lib/ferret_version.rb +1 -1
- data/test/threading/number_to_spoken.rb +132 -0
- data/test/threading/thread_safety_index_test.rb +78 -0
- data/test/threading/thread_safety_test.rb +137 -0
- data/test/unit/index/tc_index.rb +8 -0
- data/test/unit/search/tc_search_and_sort.rb +3 -3
- metadata +5 -2
data/ext/index.c
CHANGED
@@ -1552,7 +1552,7 @@ TermEnum *ste_new(InStream *is, SegmentFieldIndex *sfi)
|
|
1552
1552
|
|
1553
1553
|
typedef struct TermEnumWrapper
|
1554
1554
|
{
|
1555
|
-
int
|
1555
|
+
int index;
|
1556
1556
|
TermEnum *te;
|
1557
1557
|
int *doc_map;
|
1558
1558
|
IndexReader *ir;
|
@@ -1567,13 +1567,16 @@ typedef struct MultiTermEnum
|
|
1567
1567
|
TermEnumWrapper *tews;
|
1568
1568
|
int size;
|
1569
1569
|
int **field_num_map;
|
1570
|
+
int ti_cnt;
|
1571
|
+
TermInfo *tis;
|
1572
|
+
int *ti_indexes;
|
1570
1573
|
} MultiTermEnum;
|
1571
1574
|
|
1572
1575
|
static bool tew_lt(const TermEnumWrapper *tew1, const TermEnumWrapper *tew2)
|
1573
1576
|
{
|
1574
1577
|
int cmpres = strcmp(tew1->term, tew2->term);
|
1575
1578
|
if (cmpres == 0) {
|
1576
|
-
return tew1->
|
1579
|
+
return tew1->index < tew2->index;
|
1577
1580
|
}
|
1578
1581
|
else {
|
1579
1582
|
return cmpres < 0;
|
@@ -1617,10 +1620,10 @@ static void tew_destroy(TermEnumWrapper *tew)
|
|
1617
1620
|
tew->te->close(tew->te);
|
1618
1621
|
}
|
1619
1622
|
|
1620
|
-
TermEnumWrapper *tew_setup(TermEnumWrapper *tew, int
|
1623
|
+
TermEnumWrapper *tew_setup(TermEnumWrapper *tew, int index, TermEnum *te,
|
1621
1624
|
IndexReader *ir)
|
1622
1625
|
{
|
1623
|
-
tew->
|
1626
|
+
tew->index = index;
|
1624
1627
|
tew->ir = ir;
|
1625
1628
|
tew->te = te;
|
1626
1629
|
tew->term = te->curr_term;
|
@@ -1646,9 +1649,12 @@ static char *mte_next(TermEnum *te)
|
|
1646
1649
|
|
1647
1650
|
te->curr_ti.doc_freq = 0;
|
1648
1651
|
|
1652
|
+
MTE(te)->ti_cnt = 0;
|
1649
1653
|
while ((top != NULL) && (strcmp(te->curr_term, top->term) == 0)) {
|
1650
1654
|
pq_pop(MTE(te)->tew_queue);
|
1651
1655
|
te->curr_ti.doc_freq += top->te->curr_ti.doc_freq;/* increment freq */
|
1656
|
+
MTE(te)->ti_indexes[MTE(te)->ti_cnt] = top->index;
|
1657
|
+
MTE(te)->tis[MTE(te)->ti_cnt++] = top->te->curr_ti;
|
1652
1658
|
if (tew_next(top)) {
|
1653
1659
|
pq_push(MTE(te)->tew_queue, top); /* restore queue */
|
1654
1660
|
}
|
@@ -1711,6 +1717,8 @@ static void mte_close(TermEnum *te)
|
|
1711
1717
|
tew_destroy(&(MTE(te)->tews[i]));
|
1712
1718
|
}
|
1713
1719
|
free(MTE(te)->tews);
|
1720
|
+
free(MTE(te)->tis);
|
1721
|
+
free(MTE(te)->ti_indexes);
|
1714
1722
|
pq_destroy(MTE(te)->tew_queue);
|
1715
1723
|
free(te);
|
1716
1724
|
}
|
@@ -1718,7 +1726,6 @@ static void mte_close(TermEnum *te)
|
|
1718
1726
|
TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
|
1719
1727
|
{
|
1720
1728
|
IndexReader **readers = mr->sub_readers;
|
1721
|
-
int *starts = mr->starts;
|
1722
1729
|
int r_cnt = mr->r_cnt;
|
1723
1730
|
int i;
|
1724
1731
|
IndexReader *reader;
|
@@ -1731,6 +1738,8 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
|
|
1731
1738
|
TE(mte)->close = &mte_close;
|
1732
1739
|
|
1733
1740
|
mte->size = r_cnt;
|
1741
|
+
mte->tis = ALLOC_AND_ZERO_N(TermInfo, r_cnt);
|
1742
|
+
mte->ti_indexes = ALLOC_AND_ZERO_N(int, r_cnt);
|
1734
1743
|
mte->tews = ALLOC_AND_ZERO_N(TermEnumWrapper, r_cnt);
|
1735
1744
|
mte->tew_queue = pq_new(r_cnt, (lt_ft)&tew_lt, (free_ft)NULL);
|
1736
1745
|
mte->field_num_map = mr->field_num_map;
|
@@ -1750,7 +1759,7 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
|
|
1750
1759
|
sub_te = reader->terms(reader, fnum);
|
1751
1760
|
}
|
1752
1761
|
|
1753
|
-
tew = tew_setup(&(mte->tews[i]),
|
1762
|
+
tew = tew_setup(&(mte->tews[i]), i, sub_te, reader);
|
1754
1763
|
if (((term == NULL) && tew_next(tew))
|
1755
1764
|
|| (tew->term && (tew->term[0] != '\0'))) {
|
1756
1765
|
pq_push(mte->tew_queue, tew); /* initialize queue */
|
@@ -1759,7 +1768,7 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
|
|
1759
1768
|
/* add the term_enum_wrapper just in case */
|
1760
1769
|
sub_te = reader->terms(reader, 0);
|
1761
1770
|
sub_te->field_num = -1;
|
1762
|
-
tew_setup(&(mte->tews[i]),
|
1771
|
+
tew_setup(&(mte->tews[i]), i, sub_te, reader);
|
1763
1772
|
}
|
1764
1773
|
}
|
1765
1774
|
|
@@ -2386,45 +2395,29 @@ typedef struct MultiTermDocEnum
|
|
2386
2395
|
{
|
2387
2396
|
TermDocEnum tde;
|
2388
2397
|
int *starts;
|
2389
|
-
char *term;
|
2390
|
-
int field_num;
|
2391
2398
|
int base;
|
2392
2399
|
int ptr;
|
2393
2400
|
int ir_cnt;
|
2394
|
-
|
2401
|
+
char *state;
|
2402
|
+
TermEnum *te;
|
2395
2403
|
IndexReader **irs;
|
2396
2404
|
TermDocEnum **irs_tde;
|
2397
2405
|
TermDocEnum *curr_tde;
|
2398
|
-
TermDocEnum *(*reader_tde_i)(IndexReader *ir);
|
2399
2406
|
} MultiTermDocEnum;
|
2400
2407
|
|
2401
|
-
static TermDocEnum *
|
2402
|
-
{
|
2403
|
-
return ir->term_docs(ir);
|
2404
|
-
}
|
2405
|
-
|
2406
|
-
static TermDocEnum *mtde_get_tde_i(MultiTermDocEnum *mtde, int i)
|
2408
|
+
static TermDocEnum *mtde_next_tde(MultiTermDocEnum *mtde)
|
2407
2409
|
{
|
2408
|
-
|
2409
|
-
|
2410
|
+
mtde->ptr++;
|
2411
|
+
while (mtde->ptr < mtde->ir_cnt && !mtde->state[mtde->ptr]) {
|
2412
|
+
mtde->ptr++;
|
2413
|
+
}
|
2414
|
+
if (mtde->ptr >= mtde->ir_cnt) {
|
2415
|
+
return mtde->curr_tde = NULL;
|
2410
2416
|
}
|
2411
2417
|
else {
|
2412
|
-
|
2413
|
-
|
2414
|
-
|
2415
|
-
|
2416
|
-
if (fnum >= 0) {
|
2417
|
-
TermDocEnum *tde = mtde->irs_tde[i];
|
2418
|
-
if (tde == NULL) {
|
2419
|
-
tde = mtde->irs_tde[i] = mtde->reader_tde_i(mtde->irs[i]);
|
2420
|
-
}
|
2421
|
-
|
2422
|
-
tde->seek(tde, fnum, mtde->term);
|
2423
|
-
return tde;
|
2424
|
-
}
|
2425
|
-
else {
|
2426
|
-
return NULL;
|
2427
|
-
}
|
2418
|
+
TermDocEnum *tde = mtde->curr_tde = mtde->irs_tde[mtde->ptr];
|
2419
|
+
mtde->base = mtde->starts[mtde->ptr];
|
2420
|
+
return tde;
|
2428
2421
|
}
|
2429
2422
|
}
|
2430
2423
|
|
@@ -2435,30 +2428,35 @@ static TermDocEnum *mtde_get_tde_i(MultiTermDocEnum *mtde, int i)
|
|
2435
2428
|
}\
|
2436
2429
|
} while (0)
|
2437
2430
|
|
2438
|
-
static void
|
2431
|
+
static void mtde_seek_te(TermDocEnum *tde, TermEnum *te)
|
2439
2432
|
{
|
2433
|
+
int i;
|
2440
2434
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2441
|
-
|
2442
|
-
|
2435
|
+
memset(mtde->state, 0, mtde->ir_cnt);
|
2436
|
+
for (i = MTE(te)->ti_cnt - 1; i >= 0; i--) {
|
2437
|
+
int index = MTE(te)->ti_indexes[i];
|
2438
|
+
TermDocEnum *tde = mtde->irs_tde[index];
|
2439
|
+
mtde->state[index] = 1;
|
2440
|
+
if (tde->close == stde_close) {
|
2441
|
+
stde_seek_ti(STDE(tde), MTE(te)->tis + i);
|
2442
|
+
} else if (tde->close == stpe_close) {
|
2443
|
+
stpe_seek_ti(STDE(tde), MTE(te)->tis + i);
|
2444
|
+
} else {
|
2445
|
+
tde->seek(tde, MTE(te)->tews[index].te->field_num, te->curr_term);
|
2446
|
+
}
|
2443
2447
|
}
|
2444
|
-
mtde->term = estrdup(term);
|
2445
|
-
mtde->field_num = field_num;
|
2446
2448
|
mtde->base = 0;
|
2447
|
-
mtde->ptr =
|
2448
|
-
mtde
|
2449
|
+
mtde->ptr = -1;
|
2450
|
+
mtde_next_tde(mtde);
|
2449
2451
|
}
|
2450
2452
|
|
2451
|
-
static void
|
2453
|
+
static void mtde_seek(TermDocEnum *tde, int field_num, const char *term)
|
2452
2454
|
{
|
2453
2455
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2454
|
-
|
2455
|
-
|
2456
|
-
|
2457
|
-
|
2458
|
-
mtde->field_num = te->field_num;
|
2459
|
-
mtde->base = 0;
|
2460
|
-
mtde->ptr = 0;
|
2461
|
-
mtde->curr_tde = NULL;
|
2456
|
+
TermEnum *te = mtde->te;
|
2457
|
+
te->set_field(te, field_num);
|
2458
|
+
te->skip_to(te, term);
|
2459
|
+
mtde_seek_te(tde, te);
|
2462
2460
|
}
|
2463
2461
|
|
2464
2462
|
static int mtde_doc_num(TermDocEnum *tde)
|
@@ -2479,10 +2477,7 @@ static bool mtde_next(TermDocEnum *tde)
|
|
2479
2477
|
if (mtde->curr_tde != NULL && mtde->curr_tde->next(mtde->curr_tde)) {
|
2480
2478
|
return true;
|
2481
2479
|
}
|
2482
|
-
else if (mtde
|
2483
|
-
mtde->base = mtde->starts[mtde->ptr];
|
2484
|
-
mtde->curr_tde = mtde_get_tde_i(mtde, mtde->ptr);
|
2485
|
-
mtde->ptr++;
|
2480
|
+
else if (mtde_next_tde(mtde)) {
|
2486
2481
|
return mtde_next(tde);
|
2487
2482
|
}
|
2488
2483
|
else {
|
@@ -2495,19 +2490,11 @@ static int mtde_read(TermDocEnum *tde, int *docs, int *freqs, int req_num)
|
|
2495
2490
|
int i, end = 0, last_end = 0, b;
|
2496
2491
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2497
2492
|
while (true) {
|
2498
|
-
|
2499
|
-
if (mtde->ptr < mtde->ir_cnt) { /* try next segment */
|
2500
|
-
mtde->base = mtde->starts[mtde->ptr];
|
2501
|
-
mtde->curr_tde = mtde_get_tde_i(mtde, mtde->ptr++);
|
2502
|
-
}
|
2503
|
-
else {
|
2504
|
-
return end;
|
2505
|
-
}
|
2506
|
-
}
|
2493
|
+
if (mtde->curr_tde == NULL) return end;
|
2507
2494
|
end += mtde->curr_tde->read(mtde->curr_tde, docs + last_end,
|
2508
2495
|
freqs + last_end, req_num - last_end);
|
2509
2496
|
if (end == last_end) { /* none left in segment */
|
2510
|
-
mtde
|
2497
|
+
if (!mtde_next_tde(mtde)) return end;
|
2511
2498
|
}
|
2512
2499
|
else { /* got some */
|
2513
2500
|
b = mtde->base; /* adjust doc numbers */
|
@@ -2528,19 +2515,15 @@ static bool mtde_skip_to(TermDocEnum *tde, int target_doc_num)
|
|
2528
2515
|
{
|
2529
2516
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2530
2517
|
TermDocEnum *curr_tde;
|
2531
|
-
while (
|
2532
|
-
|
2533
|
-
if (curr_tde && (target_doc_num < mtde->starts[mtde->ptr]) &&
|
2518
|
+
while (NULL != (curr_tde = mtde->curr_tde)) {
|
2519
|
+
if (target_doc_num < mtde->starts[mtde->ptr + 1] &&
|
2534
2520
|
(curr_tde->skip_to(curr_tde, target_doc_num - mtde->base))) {
|
2535
2521
|
return true;
|
2536
2522
|
}
|
2537
2523
|
|
2538
|
-
mtde
|
2539
|
-
mtde->curr_tde = mtde_get_tde_i(mtde, mtde->ptr);
|
2540
|
-
mtde->ptr++;
|
2524
|
+
mtde_next_tde(mtde);
|
2541
2525
|
}
|
2542
2526
|
|
2543
|
-
curr_tde = mtde->curr_tde;
|
2544
2527
|
if (curr_tde) {
|
2545
2528
|
return curr_tde->skip_to(curr_tde, target_doc_num - mtde->base);
|
2546
2529
|
}
|
@@ -2554,20 +2537,18 @@ static void mtde_close(TermDocEnum *tde)
|
|
2554
2537
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2555
2538
|
TermDocEnum *tmp_tde;
|
2556
2539
|
int i = mtde->ir_cnt;
|
2540
|
+
mtde->te->close(mtde->te);
|
2557
2541
|
while (i > 0) {
|
2558
2542
|
i--;
|
2559
|
-
|
2560
|
-
|
2561
|
-
}
|
2562
|
-
}
|
2563
|
-
if (mtde->term != NULL) {
|
2564
|
-
free(mtde->term);
|
2543
|
+
tmp_tde = mtde->irs_tde[i];
|
2544
|
+
tmp_tde->close(tmp_tde);
|
2565
2545
|
}
|
2566
2546
|
free(mtde->irs_tde);
|
2547
|
+
free(mtde->state);
|
2567
2548
|
free(tde);
|
2568
2549
|
}
|
2569
2550
|
|
2570
|
-
TermDocEnum *
|
2551
|
+
TermDocEnum *mtxe_new(MultiReader *mr)
|
2571
2552
|
{
|
2572
2553
|
MultiTermDocEnum *mtde = ALLOC_AND_ZERO(MultiTermDocEnum);
|
2573
2554
|
TermDocEnum *tde = TDE(mtde);
|
@@ -2578,28 +2559,34 @@ TermDocEnum *mtde_new(MultiReader *mr)
|
|
2578
2559
|
tde->next = &mtde_next;
|
2579
2560
|
tde->read = &mtde_read;
|
2580
2561
|
tde->skip_to = &mtde_skip_to;
|
2581
|
-
tde->next_position = NULL;
|
2582
2562
|
tde->close = &mtde_close;
|
2583
2563
|
|
2564
|
+
mtde->state = ALLOC_AND_ZERO_N(char, mr->r_cnt);
|
2565
|
+
mtde->te = ((IndexReader *)mr)->terms((IndexReader *)mr, 0);
|
2584
2566
|
mtde->starts = mr->starts;
|
2585
2567
|
mtde->ir_cnt = mr->r_cnt;
|
2586
2568
|
mtde->irs = mr->sub_readers;
|
2587
|
-
mtde->field_num_map = mr->field_num_map;
|
2588
2569
|
mtde->irs_tde = ALLOC_AND_ZERO_N(TermDocEnum *, mr->r_cnt);
|
2589
|
-
mtde->reader_tde_i = &mtde_reader_tde_i;
|
2590
2570
|
|
2591
2571
|
return tde;
|
2592
2572
|
}
|
2593
2573
|
|
2574
|
+
TermDocEnum *mtde_new(MultiReader *mr)
|
2575
|
+
{
|
2576
|
+
int i;
|
2577
|
+
TermDocEnum *tde = mtxe_new(mr);
|
2578
|
+
tde->next_position = NULL;
|
2579
|
+
for (i = mr->r_cnt - 1; i >= 0; i--) {
|
2580
|
+
IndexReader *ir = mr->sub_readers[i];
|
2581
|
+
MTDE(tde)->irs_tde[i] = ir->term_docs(ir);
|
2582
|
+
}
|
2583
|
+
return tde;
|
2584
|
+
}
|
2585
|
+
|
2594
2586
|
/****************************************************************************
|
2595
2587
|
* MultiTermPosEnum
|
2596
2588
|
****************************************************************************/
|
2597
2589
|
|
2598
|
-
TermDocEnum *mtpe_reader_tde_i(IndexReader *ir)
|
2599
|
-
{
|
2600
|
-
return ir->term_positions(ir);
|
2601
|
-
}
|
2602
|
-
|
2603
2590
|
int mtpe_next_position(TermDocEnum *tde)
|
2604
2591
|
{
|
2605
2592
|
CHECK_CURR_TDE("next_position");
|
@@ -2608,9 +2595,13 @@ int mtpe_next_position(TermDocEnum *tde)
|
|
2608
2595
|
|
2609
2596
|
TermDocEnum *mtpe_new(MultiReader *mr)
|
2610
2597
|
{
|
2611
|
-
|
2598
|
+
int i;
|
2599
|
+
TermDocEnum *tde = mtxe_new(mr);
|
2612
2600
|
tde->next_position = &mtpe_next_position;
|
2613
|
-
|
2601
|
+
for (i = mr->r_cnt - 1; i >= 0; i--) {
|
2602
|
+
IndexReader *ir = mr->sub_readers[i];
|
2603
|
+
MTDE(tde)->irs_tde[i] = ir->term_positions(ir);
|
2604
|
+
}
|
2614
2605
|
return tde;
|
2615
2606
|
}
|
2616
2607
|
|
data/ext/index.h
CHANGED
@@ -378,6 +378,7 @@ struct TermDocEnum
|
|
378
378
|
{
|
379
379
|
void (*seek)(TermDocEnum *tde, int field_num, const char *term);
|
380
380
|
void (*seek_te)(TermDocEnum *tde, TermEnum *te);
|
381
|
+
void (*seek_ti)(TermDocEnum *tde, TermInfo *ti);
|
381
382
|
int (*doc_num)(TermDocEnum *tde);
|
382
383
|
int (*freq)(TermDocEnum *tde);
|
383
384
|
bool (*next)(TermDocEnum *tde);
|
data/ext/q_fuzzy.c
CHANGED
data/ext/r_index.c
CHANGED
@@ -564,6 +564,19 @@ frt_fis_to_s(VALUE self)
|
|
564
564
|
free(fis_s);
|
565
565
|
return rfis_s;
|
566
566
|
}
|
567
|
+
|
568
|
+
/*
|
569
|
+
* call-seq:
|
570
|
+
* fis.size -> int
|
571
|
+
*
|
572
|
+
* Return the number of fields in the FieldInfos object.
|
573
|
+
*/
|
574
|
+
static VALUE
|
575
|
+
frt_fis_size(VALUE self)
|
576
|
+
{
|
577
|
+
FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
|
578
|
+
return INT2FIX(fis->size);
|
579
|
+
}
|
567
580
|
|
568
581
|
/*
|
569
582
|
* call-seq:
|
@@ -2225,7 +2238,7 @@ frt_ir_get_doc(int argc, VALUE *argv, VALUE self)
|
|
2225
2238
|
pos = (pos < 0) ? (max + pos) : pos;
|
2226
2239
|
if (pos < 0 || pos >= max) {
|
2227
2240
|
rb_raise(rb_eArgError, ":%d is out of range [%d..%d] for "
|
2228
|
-
"
|
2241
|
+
"IndexReader#[]", pos, 0, max,
|
2229
2242
|
rb_id2name(SYM2ID(argv)));
|
2230
2243
|
}
|
2231
2244
|
return frt_get_lazy_doc(ir->get_lazy_doc(ir, pos));
|
@@ -2425,6 +2438,25 @@ frt_ir_terms_from(VALUE self, VALUE rfield, VALUE rterm)
|
|
2425
2438
|
StringValuePtr(rterm)));
|
2426
2439
|
}
|
2427
2440
|
|
2441
|
+
/*
|
2442
|
+
* call-seq:
|
2443
|
+
* index_reader.term_count(field) -> int
|
2444
|
+
*
|
2445
|
+
* Same return a count of the number of terms in the field
|
2446
|
+
*/
|
2447
|
+
static VALUE
|
2448
|
+
frt_ir_term_count(VALUE self, VALUE rfield)
|
2449
|
+
{
|
2450
|
+
IndexReader *ir = (IndexReader *)DATA_PTR(self);
|
2451
|
+
TermEnum *te = ir_terms(ir, frt_field(rfield));
|
2452
|
+
int count = 0;
|
2453
|
+
while (te->next(te)) {
|
2454
|
+
count++;
|
2455
|
+
}
|
2456
|
+
te->close(te);
|
2457
|
+
return INT2FIX(count);
|
2458
|
+
}
|
2459
|
+
|
2428
2460
|
/*
|
2429
2461
|
* call-seq:
|
2430
2462
|
* index_reader.fields -> array of field-names
|
@@ -2483,6 +2515,19 @@ frt_ir_tk_fields(VALUE self)
|
|
2483
2515
|
return rfield_names;
|
2484
2516
|
}
|
2485
2517
|
|
2518
|
+
/*
|
2519
|
+
* call-seq:
|
2520
|
+
* index_reader.version -> int
|
2521
|
+
*
|
2522
|
+
* Returns the current version of the index reader.
|
2523
|
+
*/
|
2524
|
+
static VALUE
|
2525
|
+
frt_ir_version(VALUE self)
|
2526
|
+
{
|
2527
|
+
IndexReader *ir = (IndexReader *)DATA_PTR(self);
|
2528
|
+
return INT2FIX(ir->sis->version);
|
2529
|
+
}
|
2530
|
+
|
2486
2531
|
/****************************************************************************
|
2487
2532
|
*
|
2488
2533
|
* Init Functions
|
@@ -2708,6 +2753,7 @@ Init_FieldInfos(void)
|
|
2708
2753
|
rb_define_method(cFieldInfos, "add_field", frt_fis_add_field, -1);
|
2709
2754
|
rb_define_method(cFieldInfos, "each", frt_fis_each, 0);
|
2710
2755
|
rb_define_method(cFieldInfos, "to_s", frt_fis_to_s, 0);
|
2756
|
+
rb_define_method(cFieldInfos, "size", frt_fis_size, 0);
|
2711
2757
|
rb_define_method(cFieldInfos, "create_index",
|
2712
2758
|
frt_fis_create_index, 1);
|
2713
2759
|
rb_define_method(cFieldInfos, "fields", frt_fis_get_fields, 0);
|
@@ -3188,6 +3234,7 @@ Init_IndexReader(void)
|
|
3188
3234
|
{
|
3189
3235
|
cIndexReader = rb_define_class_under(mIndex, "IndexReader", rb_cObject);
|
3190
3236
|
rb_define_alloc_func(cIndexReader, frt_data_alloc);
|
3237
|
+
/*rb_define_singleton_method(cIndexReader, "version", frt_class_ir_version, 0); */
|
3191
3238
|
rb_define_method(cIndexReader, "initialize", frt_ir_init, 1);
|
3192
3239
|
rb_define_method(cIndexReader, "set_norm", frt_ir_set_norm, 3);
|
3193
3240
|
rb_define_method(cIndexReader, "norms", frt_ir_norms, 1);
|
@@ -3212,10 +3259,12 @@ Init_IndexReader(void)
|
|
3212
3259
|
rb_define_method(cIndexReader, "doc_freq", frt_ir_doc_freq, 2);
|
3213
3260
|
rb_define_method(cIndexReader, "terms", frt_ir_terms, 1);
|
3214
3261
|
rb_define_method(cIndexReader, "terms_from", frt_ir_terms_from, 2);
|
3262
|
+
rb_define_method(cIndexReader, "term_count", frt_ir_term_count, 1);
|
3215
3263
|
rb_define_method(cIndexReader, "fields", frt_ir_fields, 0);
|
3216
3264
|
rb_define_method(cIndexReader, "field_names", frt_ir_fields, 0);
|
3217
3265
|
rb_define_method(cIndexReader, "field_infos", frt_ir_field_infos, 0);
|
3218
3266
|
rb_define_method(cIndexReader, "tokenized_fields", frt_ir_tk_fields, 0);
|
3267
|
+
rb_define_method(cIndexReader, "version", frt_ir_version, 0);
|
3219
3268
|
}
|
3220
3269
|
|
3221
3270
|
/* rdoc hack
|
data/ext/r_search.c
CHANGED
@@ -104,6 +104,7 @@ static ID id_score;
|
|
104
104
|
static ID id_hits;
|
105
105
|
static ID id_total_hits;
|
106
106
|
static ID id_max_score;
|
107
|
+
static ID id_searcher;
|
107
108
|
|
108
109
|
/* Search */
|
109
110
|
static VALUE sym_offset;
|
@@ -152,7 +153,7 @@ frt_get_hit(Hit *hit)
|
|
152
153
|
****************************************************************************/
|
153
154
|
|
154
155
|
static VALUE
|
155
|
-
frt_get_td(TopDocs *td)
|
156
|
+
frt_get_td(TopDocs *td, VALUE rsearcher)
|
156
157
|
{
|
157
158
|
int i;
|
158
159
|
VALUE rtop_docs;
|
@@ -167,6 +168,7 @@ frt_get_td(TopDocs *td)
|
|
167
168
|
INT2FIX(td->total_hits),
|
168
169
|
hit_ary,
|
169
170
|
rb_float_new((double)td->max_score),
|
171
|
+
rsearcher,
|
170
172
|
NULL);
|
171
173
|
td_destroy(td);
|
172
174
|
return rtop_docs;
|
@@ -174,20 +176,26 @@ frt_get_td(TopDocs *td)
|
|
174
176
|
|
175
177
|
/*
|
176
178
|
* call-seq:
|
177
|
-
* top_doc.to_s -> string
|
179
|
+
* top_doc.to_s(field = :id) -> string
|
178
180
|
*
|
179
181
|
* Returns a string represention of the top_doc in readable format.
|
180
182
|
*/
|
181
183
|
static VALUE
|
182
|
-
frt_td_to_s(VALUE self)
|
184
|
+
frt_td_to_s(int argc, VALUE *argv, VALUE self)
|
183
185
|
{
|
184
186
|
int i;
|
185
187
|
VALUE rhits = rb_funcall(self, id_hits, 0);
|
188
|
+
Searcher *sea = (Searcher *)DATA_PTR(rb_funcall(self, id_searcher, 0));
|
186
189
|
const int len = RARRAY(rhits)->len;
|
187
190
|
char *str = ALLOC_N(char, len * 64 + 100);
|
188
191
|
char *s = str;
|
192
|
+
char *field = "id";
|
189
193
|
VALUE rstr;
|
190
194
|
|
195
|
+
if (argc) {
|
196
|
+
field = frt_field(argv[0]);
|
197
|
+
}
|
198
|
+
|
191
199
|
sprintf(s, "TopDocs: total_hits = %d, max_score = %f [\n",
|
192
200
|
FIX2INT(rb_funcall(self, id_total_hits, 0)),
|
193
201
|
NUM2DBL(rb_funcall(self, id_max_score, 0)));
|
@@ -195,10 +203,18 @@ frt_td_to_s(VALUE self)
|
|
195
203
|
|
196
204
|
for (i = 0; i < len; i++) {
|
197
205
|
VALUE rhit = RARRAY(rhits)->ptr[i];
|
198
|
-
|
199
|
-
|
206
|
+
int doc_id = FIX2INT(rb_funcall(rhit, id_doc, 0));
|
207
|
+
char *value = "";
|
208
|
+
LazyDoc *lzd = sea->get_lazy_doc(sea, doc_id);
|
209
|
+
LazyDocField *lzdf = h_get(lzd->field_dict, field);
|
210
|
+
if (NULL != lzdf) {
|
211
|
+
value = lazy_df_get_data(lzdf, 0);
|
212
|
+
}
|
213
|
+
|
214
|
+
sprintf(s, "\t%d \"%s\": %f\n", doc_id, value,
|
200
215
|
NUM2DBL(rb_funcall(rhit, id_score, 0)));
|
201
216
|
s += strlen(s);
|
217
|
+
lazy_doc_close(lzd);
|
202
218
|
}
|
203
219
|
|
204
220
|
sprintf(s, "]\n");
|
@@ -2388,7 +2404,7 @@ frt_sea_search(int argc, VALUE *argv, VALUE self)
|
|
2388
2404
|
Query *query;
|
2389
2405
|
rb_scan_args(argc, argv, "11", &rquery, &roptions);
|
2390
2406
|
Data_Get_Struct(rquery, Query, query);
|
2391
|
-
return frt_get_td(frt_sea_search_internal(query, roptions, sea));
|
2407
|
+
return frt_get_td(frt_sea_search_internal(query, roptions, sea), self);
|
2392
2408
|
}
|
2393
2409
|
|
2394
2410
|
/*
|
@@ -2760,13 +2776,15 @@ Init_TopDocs(void)
|
|
2760
2776
|
"total_hits",
|
2761
2777
|
"hits",
|
2762
2778
|
"max_score",
|
2779
|
+
"searcher",
|
2763
2780
|
NULL);
|
2764
2781
|
rb_set_class_path(cTopDocs, mSearch, td_class);
|
2765
2782
|
rb_const_set(mSearch, rb_intern(td_class), cTopDocs);
|
2766
|
-
rb_define_method(cTopDocs, "to_s", frt_td_to_s,
|
2783
|
+
rb_define_method(cTopDocs, "to_s", frt_td_to_s, -1);
|
2767
2784
|
id_hits = rb_intern("hits");
|
2768
2785
|
id_total_hits = rb_intern("total_hits");
|
2769
2786
|
id_max_score = rb_intern("max_score");
|
2787
|
+
id_searcher = rb_intern("searcher");
|
2770
2788
|
}
|
2771
2789
|
|
2772
2790
|
/*
|
data/ext/search.c
CHANGED
@@ -122,11 +122,12 @@ static void hit_pq_down(PriorityQueue *pq)
|
|
122
122
|
static Hit *hit_pq_pop(PriorityQueue *pq)
|
123
123
|
{
|
124
124
|
if (pq->size > 0) {
|
125
|
-
Hit
|
126
|
-
|
127
|
-
|
125
|
+
Hit **heap = (Hit **)pq->heap;
|
126
|
+
Hit *result = heap[1]; /* save first value */
|
127
|
+
heap[1] = heap[pq->size]; /* move last to first */
|
128
|
+
heap[pq->size] = NULL;
|
128
129
|
pq->size--;
|
129
|
-
hit_pq_down(pq);
|
130
|
+
hit_pq_down(pq); /* adjust heap */
|
130
131
|
return result;
|
131
132
|
}
|
132
133
|
else {
|
@@ -1079,8 +1080,8 @@ static TopDocs *isea_search_w(Searcher *self,
|
|
1079
1080
|
for (i = num_docs - 1; i >= 0; i--) {
|
1080
1081
|
score_docs[i] = hq_pop(hq);
|
1081
1082
|
/*
|
1082
|
-
|
1083
|
-
|
1083
|
+
printf("score_docs[i][%d] = [%ld] => %d-->%f\n", i,
|
1084
|
+
score_docs[i], score_docs[i]->doc, score_docs[i]->score);
|
1084
1085
|
*/
|
1085
1086
|
}
|
1086
1087
|
}
|
data/ext/sort.c
CHANGED
@@ -426,8 +426,8 @@ int sf_string_compare(void *index, Hit *hit1, Hit *hit2)
|
|
426
426
|
char *s2 = ((StringIndex *)index)->values[
|
427
427
|
((StringIndex *)index)->index[hit2->doc]];
|
428
428
|
|
429
|
-
if (s1 == NULL) return
|
430
|
-
if (s2 == NULL) return 1;
|
429
|
+
if (s1 == NULL) return s2 ? 1 : 0;
|
430
|
+
if (s2 == NULL) return -1;
|
431
431
|
|
432
432
|
#ifdef POSH_OS_WIN32
|
433
433
|
return strcmp(s1, s2);
|
@@ -874,8 +874,8 @@ bool fdshq_lt(FieldDoc *fd1, FieldDoc *fd2)
|
|
874
874
|
do {
|
875
875
|
char *s1 = cmps1[i].val.s;
|
876
876
|
char *s2 = cmps2[i].val.s;
|
877
|
-
if (s1 == NULL) c = s2 ?
|
878
|
-
else if (s2 == NULL) c = 1;
|
877
|
+
if (s1 == NULL) c = s2 ? 1 : 0;
|
878
|
+
else if (s2 == NULL) c = -1;
|
879
879
|
#ifdef POSH_OS_WIN32
|
880
880
|
else c = strcmp(s1, s2);
|
881
881
|
#else
|
data/lib/ferret/index.rb
CHANGED
@@ -179,11 +179,13 @@ module Ferret::Index
|
|
179
179
|
# Alternatively you may want to use the HTML entity
|
180
180
|
# … or the UTF-8 string "\342\200\246".
|
181
181
|
def highlight(query, doc_id, options = {})
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
182
|
+
@dir.synchronize do
|
183
|
+
ensure_searcher_open()
|
184
|
+
@searcher.highlight(do_process_query(query),
|
185
|
+
doc_id,
|
186
|
+
options[:field]||@options[:default_field],
|
187
|
+
options)
|
188
|
+
end
|
187
189
|
end
|
188
190
|
|
189
191
|
# Closes this index by closing its associated reader and writer objects.
|
@@ -273,9 +275,14 @@ module Ferret::Index
|
|
273
275
|
end
|
274
276
|
ensure_writer_open()
|
275
277
|
|
276
|
-
|
277
|
-
|
278
|
-
|
278
|
+
if analyzer
|
279
|
+
old_analyzer = @writer.analyzer
|
280
|
+
@writer.analyzer = analyzer
|
281
|
+
@writer.add_document(doc)
|
282
|
+
@writer.analyzer = old_analyzer
|
283
|
+
else
|
284
|
+
@writer.add_document(doc)
|
285
|
+
end
|
279
286
|
|
280
287
|
flush() if @auto_flush
|
281
288
|
end
|
data/lib/ferret_version.rb
CHANGED
@@ -0,0 +1,132 @@
|
|
1
|
+
# Author: Matthew D Moss
|
2
|
+
#
|
3
|
+
# Writtern for ruby quiz #25
|
4
|
+
#
|
5
|
+
class JapaneseTranslator
|
6
|
+
# My knowledge of counting Japanese is limited, so this may not
|
7
|
+
# be entirely correct; in particular, I don't know what rules
|
8
|
+
# to follow after 'hyaku man' (1,000,000).
|
9
|
+
# I also combine a digit with its group, such as 'gohyaku' rather
|
10
|
+
# than 'go hyaku'; I just like reading it better that way.
|
11
|
+
|
12
|
+
DIGITS = %w(zero ichi ni san yon go roku nana hachi kyu)
|
13
|
+
GROUPS = %w(nothingtoseeheremovealong ju hyaku sen)
|
14
|
+
MAN = 10000
|
15
|
+
|
16
|
+
def to_spoken(val)
|
17
|
+
case val <=> 0
|
18
|
+
when -1
|
19
|
+
'- ' + to_spoken(-val)
|
20
|
+
when 0
|
21
|
+
DIGITS[0]
|
22
|
+
else
|
23
|
+
group(val, 0)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def group(val, level)
|
30
|
+
if val >= MAN
|
31
|
+
group(val / MAN, 0) + 'man ' + group(val % MAN, 0)
|
32
|
+
else
|
33
|
+
case val
|
34
|
+
when 0
|
35
|
+
''
|
36
|
+
when 1
|
37
|
+
level == 0 ? DIGITS[val] : GROUPS[level]
|
38
|
+
when 2...10
|
39
|
+
DIGITS[val] + (GROUPS[level] if level > 0).to_s
|
40
|
+
else
|
41
|
+
group(val / 10, level+1) + ' ' + group(val % 10, level)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
class USEnglishTranslator
|
49
|
+
# Formal, US English. Optional 'and'. Will not produce things
|
50
|
+
# such as 'twelve hundred' but rather 'one thousand two hundred'.
|
51
|
+
# The use of 'and' is incomplete; it is sometimes missed.
|
52
|
+
|
53
|
+
DIGITS = %w(zero one two three four five six seven eight nine)
|
54
|
+
TEENS = %w(ten eleven twelve thirteen fourteen fifteen sixteen
|
55
|
+
seventeen eighteen nineteen)
|
56
|
+
TENS = %w(hello world twenty thirty forty fifty sixty seventy
|
57
|
+
eighty ninety)
|
58
|
+
GROUPS = %w(thousand million billion trillion quadrillion
|
59
|
+
quintillion sextillion septillion octillion nonillion
|
60
|
+
decillion)
|
61
|
+
K = 1000
|
62
|
+
|
63
|
+
def initialize(conjunction = true)
|
64
|
+
@conjunction = conjunction
|
65
|
+
end
|
66
|
+
|
67
|
+
def to_spoken(val)
|
68
|
+
case val <=> 0
|
69
|
+
when -1
|
70
|
+
'negative ' + to_spoken(-val)
|
71
|
+
when 0
|
72
|
+
DIGITS[0]
|
73
|
+
else
|
74
|
+
group(val, 0).flatten.join(' ')
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
def group(val, level)
|
81
|
+
x = group(val / K, level + 1) << GROUPS[level] if val >= K
|
82
|
+
x.to_a << under_1000(val % K, level)
|
83
|
+
end
|
84
|
+
|
85
|
+
def under_1000(val, level)
|
86
|
+
x = [DIGITS[val / 100]] << 'hundred' if val >= 100
|
87
|
+
x.to_a << under_100(val % 100, (level == 0 and not x.nil?))
|
88
|
+
end
|
89
|
+
|
90
|
+
def under_100(val, junction)
|
91
|
+
x = [('and' if @conjunction and junction)] # wyf?
|
92
|
+
case val
|
93
|
+
when 0
|
94
|
+
[]
|
95
|
+
when 1...10
|
96
|
+
x << DIGITS[val]
|
97
|
+
when 10...20
|
98
|
+
x << TEENS[val - 10]
|
99
|
+
else
|
100
|
+
d = val % 10
|
101
|
+
x << (TENS[val / 10] + ('-' + DIGITS[d] if d != 0).to_s)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
class Integer
|
108
|
+
def to_spoken(translator = USEnglishTranslator.new)
|
109
|
+
translator.to_spoken(self).squeeze(' ').strip
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
if $0 == __FILE__
|
114
|
+
SAMPLES = [ 0, 1, 2, 5, 10, 11, 14, 18, 20, 21, 29, 33, 42, 50, 87, 99,
|
115
|
+
100, 101, 110, 167, 199, 200, 201, 276, 300, 314, 500, 610,
|
116
|
+
1000, 1039, 1347, 2309, 3098, 23501, 32767, 70000, 5480283,
|
117
|
+
2435489238, 234100090000, -42, -2001 ]
|
118
|
+
|
119
|
+
TRANSLATORS = { 'US English' => USEnglishTranslator.new,
|
120
|
+
'Japanese' => JapaneseTranslator.new }
|
121
|
+
|
122
|
+
|
123
|
+
# main
|
124
|
+
TRANSLATORS.each do |lang, translator|
|
125
|
+
puts
|
126
|
+
puts lang
|
127
|
+
puts '-' * lang.length
|
128
|
+
SAMPLES.each do |val|
|
129
|
+
puts "%12d => %s" % [val, val.to_spoken(translator)]
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../test_helper"
|
2
|
+
require File.dirname(__FILE__) + "/number_to_spoken.rb"
|
3
|
+
require 'thread'
|
4
|
+
|
5
|
+
class IndexThreadSafetyTest < Test::Unit::TestCase
|
6
|
+
include Ferret::Index
|
7
|
+
|
8
|
+
INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
|
9
|
+
ITERATIONS = 1000
|
10
|
+
NUM_THREADS = 2
|
11
|
+
ANALYZER = Ferret::Analysis::StandardAnalyzer.new()
|
12
|
+
|
13
|
+
def setup
|
14
|
+
index = Index.new(:path => INDEX_DIR,
|
15
|
+
:create => true,
|
16
|
+
:analyzer => ANALYZER,
|
17
|
+
:default_field => :content)
|
18
|
+
index.close
|
19
|
+
end
|
20
|
+
|
21
|
+
def indexing_thread()
|
22
|
+
index = Index.new(:path => INDEX_DIR,
|
23
|
+
:analyzer => ANALYZER,
|
24
|
+
:default_field => :content)
|
25
|
+
|
26
|
+
ITERATIONS.times do
|
27
|
+
choice = rand()
|
28
|
+
|
29
|
+
if choice > 0.98
|
30
|
+
do_optimize(index)
|
31
|
+
elsif choice > 0.7
|
32
|
+
do_delete_doc(index)
|
33
|
+
elsif choice > 0.5
|
34
|
+
do_search(index)
|
35
|
+
else
|
36
|
+
do_add_doc(index)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def do_optimize(index)
|
42
|
+
puts "Optimizing the index"
|
43
|
+
index.optimize
|
44
|
+
end
|
45
|
+
|
46
|
+
def do_delete_doc(index)
|
47
|
+
return if index.size == 0
|
48
|
+
doc_num = rand(index.size)
|
49
|
+
puts "Deleting #{doc_num} from index which has#{index.has_deletions? ? "" : " no"} deletions"
|
50
|
+
puts "document was already deleted" if (index.deleted?(doc_num))
|
51
|
+
index.delete(doc_num)
|
52
|
+
end
|
53
|
+
|
54
|
+
def do_add_doc(index)
|
55
|
+
n = rand(0xFFFFFFFF)
|
56
|
+
d = {:id => n, :content => n.to_spoken}
|
57
|
+
puts("Adding #{n}")
|
58
|
+
index << d
|
59
|
+
end
|
60
|
+
|
61
|
+
def do_search(index)
|
62
|
+
n = rand(0xFFFFFFFF)
|
63
|
+
puts("Searching for #{n}")
|
64
|
+
hits = index.search_each(n.to_spoken, :num_docs => 3) do |d, s|
|
65
|
+
puts "Hit for #{n}: #{index[d][:id]} - #{s}"
|
66
|
+
end
|
67
|
+
puts("Searched for #{n}: total = #{hits}")
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_threading
|
71
|
+
threads = []
|
72
|
+
NUM_THREADS.times do
|
73
|
+
threads << Thread.new { indexing_thread }
|
74
|
+
end
|
75
|
+
|
76
|
+
threads.each {|t| t.join}
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../test_helper"
|
2
|
+
require File.dirname(__FILE__) + "/../utils/number_to_spoken.rb"
|
3
|
+
require 'thread'
|
4
|
+
|
5
|
+
class ThreadSafetyTest
|
6
|
+
include Ferret::Index
|
7
|
+
include Ferret::Search
|
8
|
+
include Ferret::Store
|
9
|
+
include Ferret::Document
|
10
|
+
|
11
|
+
def initialize(options)
|
12
|
+
@options = options
|
13
|
+
end
|
14
|
+
|
15
|
+
INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
|
16
|
+
ANALYZER = Ferret::Analysis::Analyzer.new()
|
17
|
+
ITERATIONS = 19
|
18
|
+
@@searcher = nil
|
19
|
+
|
20
|
+
def run_index_thread(writer)
|
21
|
+
reopen_interval = 30 + rand(60)
|
22
|
+
|
23
|
+
use_compound_file = false
|
24
|
+
|
25
|
+
(400*ITERATIONS).times do |i|
|
26
|
+
d = Document.new()
|
27
|
+
n = rand(0xFFFFFFFF)
|
28
|
+
d << Field.new("id", n.to_s, Field::Store::YES, Field::Index::UNTOKENIZED)
|
29
|
+
d << Field.new("contents", n.to_spoken, Field::Store::NO, Field::Index::TOKENIZED)
|
30
|
+
puts("Adding #{n}")
|
31
|
+
|
32
|
+
# Switch between single and multiple file segments
|
33
|
+
use_compound_file = (rand < 0.5)
|
34
|
+
writer.use_compound_file = use_compound_file
|
35
|
+
|
36
|
+
writer << d
|
37
|
+
|
38
|
+
if (i % reopen_interval == 0)
|
39
|
+
writer.close()
|
40
|
+
writer = IndexWriter.new(INDEX_DIR, :analyzer => ANALYZER)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
writer.close()
|
45
|
+
rescue => e
|
46
|
+
puts e
|
47
|
+
puts e.backtrace
|
48
|
+
raise e
|
49
|
+
end
|
50
|
+
|
51
|
+
def run_search_thread(use_global)
|
52
|
+
reopen_interval = 10 + rand(20)
|
53
|
+
|
54
|
+
unless use_global
|
55
|
+
searcher = IndexSearcher.new(INDEX_DIR)
|
56
|
+
end
|
57
|
+
|
58
|
+
(50*ITERATIONS).times do |i|
|
59
|
+
search_for(rand(0xFFFFFFFF), (searcher.nil? ? @@searcher : searcher))
|
60
|
+
if (i%reopen_interval == 0)
|
61
|
+
if (searcher == nil)
|
62
|
+
@@searcher = IndexSearcher.new(INDEX_DIR)
|
63
|
+
else
|
64
|
+
searcher.close()
|
65
|
+
searcher = IndexSearcher.new(INDEX_DIR)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
rescue => e
|
70
|
+
puts e
|
71
|
+
puts e.backtrace
|
72
|
+
raise e
|
73
|
+
end
|
74
|
+
|
75
|
+
def search_for(n, searcher)
|
76
|
+
puts("Searching for #{n}")
|
77
|
+
hits =
|
78
|
+
searcher.search(Ferret::QueryParser.parse(n.to_spoken, "contents", :analyzer => ANALYZER),
|
79
|
+
:num_docs => 3)
|
80
|
+
puts("Search for #{n}: total = #{hits.size}")
|
81
|
+
hits.each do |d, s|
|
82
|
+
puts "Hit for #{n}: #{searcher.reader.get_document(d)["id"]} - #{s}"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def run_test_threads
|
87
|
+
|
88
|
+
threads = []
|
89
|
+
unless @options[:read_only]
|
90
|
+
writer = IndexWriter.new(INDEX_DIR, :analyzer => ANALYZER,
|
91
|
+
:create => !@options[:add])
|
92
|
+
|
93
|
+
threads << Thread.new { run_index_thread(writer) }
|
94
|
+
|
95
|
+
sleep(1)
|
96
|
+
end
|
97
|
+
|
98
|
+
threads << Thread.new { run_search_thread(false)}
|
99
|
+
|
100
|
+
@@searcher = IndexSearcher.new(INDEX_DIR)
|
101
|
+
threads << Thread.new { run_search_thread(true)}
|
102
|
+
|
103
|
+
threads << Thread.new { run_search_thread(true)}
|
104
|
+
|
105
|
+
threads.each {|t| t.join}
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
if $0 == __FILE__
|
111
|
+
require 'optparse'
|
112
|
+
|
113
|
+
OPTIONS = {
|
114
|
+
:all => false,
|
115
|
+
:read_only => false,
|
116
|
+
}
|
117
|
+
|
118
|
+
ARGV.options do |opts|
|
119
|
+
script_name = File.basename($0)
|
120
|
+
opts.banner = "Usage: ruby #{script_name} [options]"
|
121
|
+
|
122
|
+
opts.separator ""
|
123
|
+
|
124
|
+
opts.on("-r", "--read-only", "Read Only.") { OPTIONS[:all] = true }
|
125
|
+
opts.on("-a", "--all", "All.") { OPTIONS[:read_only] = true }
|
126
|
+
|
127
|
+
opts.separator ""
|
128
|
+
|
129
|
+
opts.on("-h", "--help",
|
130
|
+
"Show this help message.") { puts opts; exit }
|
131
|
+
|
132
|
+
opts.parse!
|
133
|
+
end
|
134
|
+
|
135
|
+
tst = ThreadSafetyTest.new(OPTIONS)
|
136
|
+
tst.run_test_threads
|
137
|
+
end
|
data/test/unit/index/tc_index.rb
CHANGED
@@ -766,4 +766,12 @@ class IndexTest < Test::Unit::TestCase
|
|
766
766
|
|
767
767
|
index.close
|
768
768
|
end
|
769
|
+
|
770
|
+
def test_changing_analyzer
|
771
|
+
index = Ferret::I.new
|
772
|
+
a = Ferret::Analysis::WhiteSpaceAnalyzer.new(false)
|
773
|
+
index.add_document({:content => "Content With Capitals"}, a)
|
774
|
+
tv = index.reader.term_vector(0, :content)
|
775
|
+
assert_equal("Capitals", tv.terms[0].text)
|
776
|
+
end
|
769
777
|
end
|
@@ -16,8 +16,8 @@ class SearchAndSortTest < Test::Unit::TestCase
|
|
16
16
|
{:x => "findall", :string => "c", :int => "5", :float => "0.1"}, # 3 3
|
17
17
|
{:x => "findall", :string => "e", :int => "2", :float => "0.001"}, # 5 1
|
18
18
|
{:x => "findall", :string => "g", :int => "1", :float => "1.0"}, # 3 3
|
19
|
-
{:x => "findall", :string =>
|
20
|
-
{:x => "findall", :string => "
|
19
|
+
{:x => "findall", :string => nil, :int => "3", :float => "0.0001"}, # 6 2
|
20
|
+
{:x => "findall", :string => "", :int => "4", :float => "10.0"}, # 4 0
|
21
21
|
{:x => "findall", :string => "h", :int => "5", :float => "0.00001"}, # 7 3
|
22
22
|
{:x => "findall", :string => "f", :int => "2", :float => "100.0"}, # 5 1
|
23
23
|
{:x => "findall", :string => "d", :int => "3", :float => "1000.0"}, # 6 2
|
@@ -145,7 +145,7 @@ class SearchAndSortTest < Test::Unit::TestCase
|
|
145
145
|
|
146
146
|
## str
|
147
147
|
sf_str = SortField.new(:string, {:type => :string})
|
148
|
-
do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,4
|
148
|
+
do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,5,4], [sf_str, SortField::SCORE])
|
149
149
|
do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,4,5], "string")
|
150
150
|
|
151
151
|
## auto
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.10.
|
7
|
-
date: 2006-
|
6
|
+
version: 0.10.10
|
7
|
+
date: 2006-10-08 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -198,6 +198,9 @@ files:
|
|
198
198
|
- test/unit/search/tc_search_and_sort.rb
|
199
199
|
- test/unit/search/tm_searcher.rb
|
200
200
|
- test/unit/query_parser/tc_query_parser.rb
|
201
|
+
- test/threading/thread_safety_index_test.rb
|
202
|
+
- test/threading/thread_safety_test.rb
|
203
|
+
- test/threading/number_to_spoken.rb
|
201
204
|
test_files: []
|
202
205
|
|
203
206
|
rdoc_options:
|