whistlepig 0.6 → 0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +2 -2
- data/ext/whistlepig/query-parser.lex.c +3 -3
- data/ext/whistlepig/query-parser.tab.c +58 -45
- data/ext/whistlepig/query.c +32 -40
- data/ext/whistlepig/query.h +4 -0
- data/ext/whistlepig/search.c +61 -2
- data/ext/whistlepig/tokenizer.lex.c +13 -13
- metadata +24 -37
data/README
CHANGED
|
@@ -8,7 +8,7 @@ the frills, Whistlepig may be for you.
|
|
|
8
8
|
Whistlepig is written in ANSI C99. It currently provides a C API and Ruby
|
|
9
9
|
bindings.
|
|
10
10
|
|
|
11
|
-
Latest version: 0.
|
|
11
|
+
Latest version: 0.7, released 2011-07-19.
|
|
12
12
|
Status: alpha
|
|
13
13
|
News: http://all-thing.net/label/whistlepig/
|
|
14
14
|
Homepage: http://masanjin.net/whistlepig/
|
|
@@ -16,7 +16,7 @@ Latest version: 0.6, released 2011-04-28.
|
|
|
16
16
|
|
|
17
17
|
= Getting it
|
|
18
18
|
|
|
19
|
-
Tarball: http://masanjin.net/whistlepig/whistlepig-0.
|
|
19
|
+
Tarball: http://masanjin.net/whistlepig/whistlepig-0.7.tar.gz
|
|
20
20
|
Rubygem: gem install whistlepig
|
|
21
21
|
Git: git clone git://github.com/wmorgan/whistlepig.git
|
|
22
22
|
|
|
@@ -389,7 +389,7 @@ static yyconst struct yy_trans_info yy_transition[1808] =
|
|
|
389
389
|
{ 27, 516 }, { 28, 516 }, { 29, 516 }, { 30, 516 }, { 31, 516 },
|
|
390
390
|
{ 32, 526 }, { 33, 516 }, { 34, 774 }, { 35, 516 }, { 36, 516 },
|
|
391
391
|
{ 37, 516 }, { 38, 516 }, { 39, 516 }, { 40, 774 }, { 41, 774 },
|
|
392
|
-
{ 42,
|
|
392
|
+
{ 42, 774 }, { 43, 516 }, { 44, 516 }, { 45, 774 }, { 46, 516 },
|
|
393
393
|
|
|
394
394
|
{ 47, 516 }, { 48, 516 }, { 49, 516 }, { 50, 516 }, { 51, 516 },
|
|
395
395
|
{ 52, 516 }, { 53, 516 }, { 54, 516 }, { 55, 516 }, { 56, 516 },
|
|
@@ -446,7 +446,7 @@ static yyconst struct yy_trans_info yy_transition[1808] =
|
|
|
446
446
|
{ 29, 258 }, { 30, 258 }, { 31, 258 }, { 32, 268 }, { 33, 258 },
|
|
447
447
|
{ 34, 516 }, { 35, 258 }, { 36, 258 }, { 37, 258 }, { 38, 258 },
|
|
448
448
|
|
|
449
|
-
{ 39, 258 }, { 40, 516 }, { 41, 516 }, { 42,
|
|
449
|
+
{ 39, 258 }, { 40, 516 }, { 41, 516 }, { 42, 516 }, { 43, 258 },
|
|
450
450
|
{ 44, 258 }, { 45, 516 }, { 46, 258 }, { 47, 258 }, { 48, 258 },
|
|
451
451
|
{ 49, 258 }, { 50, 258 }, { 51, 258 }, { 52, 258 }, { 53, 258 },
|
|
452
452
|
{ 54, 258 }, { 55, 258 }, { 56, 258 }, { 57, 258 }, { 58, 516 },
|
|
@@ -764,7 +764,7 @@ static yyconst flex_int32_t yy_rule_can_match_eol[6] =
|
|
|
764
764
|
} \
|
|
765
765
|
}
|
|
766
766
|
|
|
767
|
-
/* for the first char, everything is allowed except ()"
|
|
767
|
+
/* for the first char, everything is allowed except ()"-~:* */
|
|
768
768
|
/* inside a word, everything is allowed except ()": */
|
|
769
769
|
#line 770 "query-parser.lex.c"
|
|
770
770
|
|
|
@@ -384,16 +384,16 @@ union yyalloc
|
|
|
384
384
|
/* YYFINAL -- State number of the termination state. */
|
|
385
385
|
#define YYFINAL 3
|
|
386
386
|
/* YYLAST -- Last index in YYTABLE. */
|
|
387
|
-
#define YYLAST
|
|
387
|
+
#define YYLAST 31
|
|
388
388
|
|
|
389
389
|
/* YYNTOKENS -- Number of terminals. */
|
|
390
|
-
#define YYNTOKENS
|
|
390
|
+
#define YYNTOKENS 12
|
|
391
391
|
/* YYNNTS -- Number of nonterminals. */
|
|
392
392
|
#define YYNNTS 8
|
|
393
393
|
/* YYNRULES -- Number of rules. */
|
|
394
|
-
#define YYNRULES
|
|
394
|
+
#define YYNRULES 20
|
|
395
395
|
/* YYNRULES -- Number of states. */
|
|
396
|
-
#define YYNSTATES
|
|
396
|
+
#define YYNSTATES 30
|
|
397
397
|
|
|
398
398
|
/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
|
|
399
399
|
#define YYUNDEFTOK 2
|
|
@@ -408,8 +408,8 @@ static const yytype_uint8 yytranslate[] =
|
|
|
408
408
|
0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
409
409
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
410
410
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
411
|
-
2, 2, 2, 2,
|
|
412
|
-
|
|
411
|
+
2, 2, 2, 2, 9, 2, 2, 2, 2, 2,
|
|
412
|
+
10, 11, 8, 2, 2, 6, 2, 2, 2, 2,
|
|
413
413
|
2, 2, 2, 2, 2, 2, 2, 2, 5, 2,
|
|
414
414
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
415
415
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
@@ -439,25 +439,27 @@ static const yytype_uint8 yytranslate[] =
|
|
|
439
439
|
static const yytype_uint8 yyprhs[] =
|
|
440
440
|
{
|
|
441
441
|
0, 0, 3, 5, 6, 9, 12, 16, 20, 22,
|
|
442
|
-
24, 26, 30, 34, 38, 41, 44,
|
|
442
|
+
24, 26, 30, 34, 38, 41, 44, 46, 50, 52,
|
|
443
|
+
55
|
|
443
444
|
};
|
|
444
445
|
|
|
445
446
|
/* YYRHS -- A `-1'-separated list of the rules' RHS. */
|
|
446
447
|
static const yytype_int8 yyrhs[] =
|
|
447
448
|
{
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
3, -1,
|
|
451
|
-
3, 5,
|
|
452
|
-
-1, 7, 3, -1, 8,
|
|
453
|
-
|
|
449
|
+
13, 0, -1, 14, -1, -1, 14, 16, -1, 14,
|
|
450
|
+
15, -1, 16, 4, 16, -1, 15, 4, 16, -1,
|
|
451
|
+
3, -1, 19, -1, 17, -1, 3, 5, 3, -1,
|
|
452
|
+
3, 5, 19, -1, 3, 5, 17, -1, 6, 16,
|
|
453
|
+
-1, 7, 3, -1, 8, -1, 9, 18, 9, -1,
|
|
454
|
+
3, -1, 18, 3, -1, 10, 14, 11, -1
|
|
454
455
|
};
|
|
455
456
|
|
|
456
457
|
/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
|
|
457
458
|
static const yytype_uint8 yyrline[] =
|
|
458
459
|
{
|
|
459
460
|
0, 41, 41, 44, 45, 53, 64, 65, 68, 69,
|
|
460
|
-
70, 71, 72, 73, 74, 75,
|
|
461
|
+
70, 71, 72, 73, 74, 75, 76, 79, 82, 83,
|
|
462
|
+
86
|
|
461
463
|
};
|
|
462
464
|
#endif
|
|
463
465
|
|
|
@@ -466,7 +468,7 @@ static const yytype_uint8 yyrline[] =
|
|
|
466
468
|
First, the terminals, then, starting at YYNTOKENS, nonterminals. */
|
|
467
469
|
static const char *const yytname[] =
|
|
468
470
|
{
|
|
469
|
-
"$end", "error", "$undefined", "WORD", "OR", "':'", "'-'", "'~'",
|
|
471
|
+
"$end", "error", "$undefined", "WORD", "OR", "':'", "'-'", "'~'", "'*'",
|
|
470
472
|
"'\"'", "'('", "')'", "$accept", "result", "query", "disj", "atom",
|
|
471
473
|
"phrase", "words", "parens", 0
|
|
472
474
|
};
|
|
@@ -477,23 +479,25 @@ static const char *const yytname[] =
|
|
|
477
479
|
token YYLEX-NUM. */
|
|
478
480
|
static const yytype_uint16 yytoknum[] =
|
|
479
481
|
{
|
|
480
|
-
0, 256, 257, 258, 259, 58, 45, 126,
|
|
481
|
-
41
|
|
482
|
+
0, 256, 257, 258, 259, 58, 45, 126, 42, 34,
|
|
483
|
+
40, 41
|
|
482
484
|
};
|
|
483
485
|
# endif
|
|
484
486
|
|
|
485
487
|
/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
|
|
486
488
|
static const yytype_uint8 yyr1[] =
|
|
487
489
|
{
|
|
488
|
-
0,
|
|
489
|
-
|
|
490
|
+
0, 12, 13, 14, 14, 14, 15, 15, 16, 16,
|
|
491
|
+
16, 16, 16, 16, 16, 16, 16, 17, 18, 18,
|
|
492
|
+
19
|
|
490
493
|
};
|
|
491
494
|
|
|
492
495
|
/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
|
|
493
496
|
static const yytype_uint8 yyr2[] =
|
|
494
497
|
{
|
|
495
498
|
0, 2, 1, 0, 2, 2, 3, 3, 1, 1,
|
|
496
|
-
1, 3, 3, 3, 2, 2, 3, 1, 2,
|
|
499
|
+
1, 3, 3, 3, 2, 2, 1, 3, 1, 2,
|
|
500
|
+
3
|
|
497
501
|
};
|
|
498
502
|
|
|
499
503
|
/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
|
|
@@ -501,15 +505,15 @@ static const yytype_uint8 yyr2[] =
|
|
|
501
505
|
means the default is an error. */
|
|
502
506
|
static const yytype_uint8 yydefact[] =
|
|
503
507
|
{
|
|
504
|
-
3, 0, 2, 1, 8, 0, 0, 0, 3,
|
|
505
|
-
4, 10, 9, 0, 14, 15,
|
|
506
|
-
0, 11, 13, 12,
|
|
508
|
+
3, 0, 2, 1, 8, 0, 0, 16, 0, 3,
|
|
509
|
+
5, 4, 10, 9, 0, 14, 15, 18, 0, 0,
|
|
510
|
+
0, 0, 11, 13, 12, 19, 17, 20, 7, 6
|
|
507
511
|
};
|
|
508
512
|
|
|
509
513
|
/* YYDEFGOTO[NTERM-NUM]. */
|
|
510
514
|
static const yytype_int8 yydefgoto[] =
|
|
511
515
|
{
|
|
512
|
-
-1, 1, 2,
|
|
516
|
+
-1, 1, 2, 10, 11, 12, 18, 13
|
|
513
517
|
};
|
|
514
518
|
|
|
515
519
|
/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
|
|
@@ -517,15 +521,15 @@ static const yytype_int8 yydefgoto[] =
|
|
|
517
521
|
#define YYPACT_NINF -6
|
|
518
522
|
static const yytype_int8 yypact[] =
|
|
519
523
|
{
|
|
520
|
-
-6, 2,
|
|
521
|
-
|
|
522
|
-
|
|
524
|
+
-6, 2, 11, -6, 5, 11, 0, -6, 8, -6,
|
|
525
|
+
9, 22, -6, -6, 19, -6, -6, -6, 21, -2,
|
|
526
|
+
11, 11, -6, -6, -6, -6, -6, -6, -6, -6
|
|
523
527
|
};
|
|
524
528
|
|
|
525
529
|
/* YYPGOTO[NTERM-NUM]. */
|
|
526
530
|
static const yytype_int8 yypgoto[] =
|
|
527
531
|
{
|
|
528
|
-
-6, -6, 3, -6, -5,
|
|
532
|
+
-6, -6, 3, -6, -5, 13, -6, 17
|
|
529
533
|
};
|
|
530
534
|
|
|
531
535
|
/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If
|
|
@@ -535,25 +539,27 @@ static const yytype_int8 yypgoto[] =
|
|
|
535
539
|
#define YYTABLE_NINF -1
|
|
536
540
|
static const yytype_uint8 yytable[] =
|
|
537
541
|
{
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
542
|
+
15, 4, 3, 16, 5, 6, 7, 8, 9, 27,
|
|
543
|
+
14, 17, 19, 20, 4, 28, 29, 5, 6, 7,
|
|
544
|
+
8, 9, 22, 0, 25, 0, 21, 23, 8, 9,
|
|
545
|
+
26, 24
|
|
541
546
|
};
|
|
542
547
|
|
|
543
548
|
static const yytype_int8 yycheck[] =
|
|
544
549
|
{
|
|
545
|
-
5, 3, 0, 3, 6, 7, 8, 9, 10,
|
|
546
|
-
3,
|
|
547
|
-
3, -1, 3, -1, 4,
|
|
550
|
+
5, 3, 0, 3, 6, 7, 8, 9, 10, 11,
|
|
551
|
+
5, 3, 9, 4, 3, 20, 21, 6, 7, 8,
|
|
552
|
+
9, 10, 3, -1, 3, -1, 4, 14, 9, 10,
|
|
553
|
+
9, 14
|
|
548
554
|
};
|
|
549
555
|
|
|
550
556
|
/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
|
|
551
557
|
symbol of state STATE-NUM. */
|
|
552
558
|
static const yytype_uint8 yystos[] =
|
|
553
559
|
{
|
|
554
|
-
0,
|
|
555
|
-
15, 16,
|
|
556
|
-
4, 3,
|
|
560
|
+
0, 13, 14, 0, 3, 6, 7, 8, 9, 10,
|
|
561
|
+
15, 16, 17, 19, 5, 16, 3, 3, 18, 14,
|
|
562
|
+
4, 4, 3, 17, 19, 3, 9, 11, 16, 16
|
|
557
563
|
};
|
|
558
564
|
|
|
559
565
|
#define yyerrok (yyerrstatus = 0)
|
|
@@ -1505,35 +1511,42 @@ yyreduce:
|
|
|
1505
1511
|
case 16:
|
|
1506
1512
|
|
|
1507
1513
|
/* Line 1455 of yacc.c */
|
|
1508
|
-
#line
|
|
1509
|
-
{ (yyval.query) = (
|
|
1514
|
+
#line 76 "query-parser.y"
|
|
1515
|
+
{ (yyval.query) = wp_query_new_every(); ;}
|
|
1510
1516
|
break;
|
|
1511
1517
|
|
|
1512
1518
|
case 17:
|
|
1513
1519
|
|
|
1514
1520
|
/* Line 1455 of yacc.c */
|
|
1515
|
-
#line
|
|
1516
|
-
{ (yyval.query) =
|
|
1521
|
+
#line 79 "query-parser.y"
|
|
1522
|
+
{ (yyval.query) = (yyvsp[(2) - (3)].query); ;}
|
|
1517
1523
|
break;
|
|
1518
1524
|
|
|
1519
1525
|
case 18:
|
|
1520
1526
|
|
|
1521
1527
|
/* Line 1455 of yacc.c */
|
|
1522
1528
|
#line 82 "query-parser.y"
|
|
1523
|
-
{ (yyval.query) =
|
|
1529
|
+
{ (yyval.query) = wp_query_new_phrase(); (yyval.query) = wp_query_add((yyval.query), wp_query_new_term(strdup(context->default_field), (yyvsp[(1) - (1)].string))); ;}
|
|
1524
1530
|
break;
|
|
1525
1531
|
|
|
1526
1532
|
case 19:
|
|
1527
1533
|
|
|
1528
1534
|
/* Line 1455 of yacc.c */
|
|
1529
|
-
#line
|
|
1535
|
+
#line 83 "query-parser.y"
|
|
1536
|
+
{ (yyval.query) = wp_query_add((yyvsp[(1) - (2)].query), wp_query_new_term(strdup(context->default_field), (yyvsp[(2) - (2)].string))); ;}
|
|
1537
|
+
break;
|
|
1538
|
+
|
|
1539
|
+
case 20:
|
|
1540
|
+
|
|
1541
|
+
/* Line 1455 of yacc.c */
|
|
1542
|
+
#line 86 "query-parser.y"
|
|
1530
1543
|
{ (yyval.query) = (yyvsp[(2) - (3)].query); ;}
|
|
1531
1544
|
break;
|
|
1532
1545
|
|
|
1533
1546
|
|
|
1534
1547
|
|
|
1535
1548
|
/* Line 1455 of yacc.c */
|
|
1536
|
-
#line
|
|
1549
|
+
#line 1550 "query-parser.tab.c"
|
|
1537
1550
|
default: break;
|
|
1538
1551
|
}
|
|
1539
1552
|
YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
|
|
@@ -1752,6 +1765,6 @@ yyreturn:
|
|
|
1752
1765
|
|
|
1753
1766
|
|
|
1754
1767
|
/* Line 1675 of yacc.c */
|
|
1755
|
-
#line
|
|
1768
|
+
#line 89 "query-parser.y"
|
|
1756
1769
|
|
|
1757
1770
|
|
data/ext/whistlepig/query.c
CHANGED
|
@@ -53,35 +53,19 @@ wp_query* wp_query_new_label(const char* label) {
|
|
|
53
53
|
return ret;
|
|
54
54
|
}
|
|
55
55
|
|
|
56
|
-
|
|
57
|
-
wp_query*
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
wp_query* wp_query_new_disjunction() {
|
|
63
|
-
wp_query* ret = wp_query_new();
|
|
64
|
-
ret->type = WP_QUERY_DISJ;
|
|
65
|
-
return ret;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
wp_query* wp_query_new_phrase() {
|
|
69
|
-
wp_query* ret = wp_query_new();
|
|
70
|
-
ret->type = WP_QUERY_PHRASE;
|
|
71
|
-
return ret;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
wp_query* wp_query_new_negation() {
|
|
75
|
-
wp_query* ret = wp_query_new();
|
|
76
|
-
ret->type = WP_QUERY_NEG;
|
|
77
|
-
return ret;
|
|
78
|
-
}
|
|
56
|
+
#define SIMPLE_QUERY_CONSTRUCTOR(name, type_name) \
|
|
57
|
+
wp_query* wp_query_new_##name() { \
|
|
58
|
+
wp_query* ret = wp_query_new(); \
|
|
59
|
+
ret->type = type_name; \
|
|
60
|
+
return ret; \
|
|
61
|
+
}
|
|
79
62
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
63
|
+
SIMPLE_QUERY_CONSTRUCTOR(conjunction, WP_QUERY_CONJ);
|
|
64
|
+
SIMPLE_QUERY_CONSTRUCTOR(disjunction, WP_QUERY_DISJ);
|
|
65
|
+
SIMPLE_QUERY_CONSTRUCTOR(phrase, WP_QUERY_PHRASE);
|
|
66
|
+
SIMPLE_QUERY_CONSTRUCTOR(negation, WP_QUERY_NEG);
|
|
67
|
+
SIMPLE_QUERY_CONSTRUCTOR(empty, WP_QUERY_EMPTY);
|
|
68
|
+
SIMPLE_QUERY_CONSTRUCTOR(every, WP_QUERY_EVERY);
|
|
85
69
|
|
|
86
70
|
wp_query* wp_query_add(wp_query* a, wp_query* b) {
|
|
87
71
|
if(a->type == WP_QUERY_EMPTY) {
|
|
@@ -129,22 +113,30 @@ static int subquery_to_s(wp_query* q, size_t n, char* buf) {
|
|
|
129
113
|
#define min(a, b) (a < b ? a : b)
|
|
130
114
|
|
|
131
115
|
size_t wp_query_to_s(wp_query* q, size_t n, char* buf) {
|
|
132
|
-
size_t ret;
|
|
116
|
+
size_t ret, term_n;
|
|
133
117
|
char* orig_buf = buf;
|
|
134
118
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
else if(q->type == WP_QUERY_TERM) {
|
|
140
|
-
size_t term_n = (size_t)snprintf(buf, n, "%s:\"%s\"", q->field, q->word);
|
|
119
|
+
/* nodes without children */
|
|
120
|
+
switch(q->type) {
|
|
121
|
+
case WP_QUERY_TERM:
|
|
122
|
+
term_n = (size_t)snprintf(buf, n, "%s:\"%s\"", q->field, q->word);
|
|
141
123
|
ret = min(term_n, n);
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
124
|
+
break;
|
|
125
|
+
case WP_QUERY_LABEL:
|
|
126
|
+
term_n = (size_t)snprintf(buf, n, "~%s", q->word);
|
|
145
127
|
ret = min(term_n, n);
|
|
146
|
-
|
|
147
|
-
|
|
128
|
+
break;
|
|
129
|
+
case WP_QUERY_EMPTY:
|
|
130
|
+
term_n = (size_t)snprintf(buf, n, "<EMPTY>");
|
|
131
|
+
ret = min(term_n, n);
|
|
132
|
+
break;
|
|
133
|
+
case WP_QUERY_EVERY:
|
|
134
|
+
term_n = (size_t)snprintf(buf, n, "<EVERY>");
|
|
135
|
+
ret = min(term_n, n);
|
|
136
|
+
break;
|
|
137
|
+
|
|
138
|
+
/* nodes with children */
|
|
139
|
+
default:
|
|
148
140
|
switch(q->type) {
|
|
149
141
|
case WP_QUERY_CONJ:
|
|
150
142
|
if(n >= 4) { // "(AND"
|
data/ext/whistlepig/query.h
CHANGED
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
#define WP_QUERY_NEG 5
|
|
22
22
|
#define WP_QUERY_LABEL 6
|
|
23
23
|
#define WP_QUERY_EMPTY 7
|
|
24
|
+
#define WP_QUERY_EVERY 8
|
|
24
25
|
|
|
25
26
|
// a node in the query tree
|
|
26
27
|
typedef struct wp_query {
|
|
@@ -60,6 +61,9 @@ wp_query* wp_query_new_negation();
|
|
|
60
61
|
// public: make an empty query node.
|
|
61
62
|
wp_query* wp_query_new_empty();
|
|
62
63
|
|
|
64
|
+
// public: make an every-document query node.
|
|
65
|
+
wp_query* wp_query_new_every();
|
|
66
|
+
|
|
63
67
|
// public: deep clone of a query, but dropping all search state.
|
|
64
68
|
wp_query* wp_query_clone(wp_query* other);
|
|
65
69
|
|
data/ext/whistlepig/search.c
CHANGED
|
@@ -74,8 +74,8 @@ RAISING_STATIC(search_result_combine_into(search_result* result, search_result*
|
|
|
74
74
|
* call to next() will give you the next result (or set done = true).
|
|
75
75
|
*
|
|
76
76
|
* advance() is given a docid and advances the stream to just *after* that
|
|
77
|
-
* document, and tells you whether it saw the docid on the way(and
|
|
78
|
-
* if so for your convenience).
|
|
77
|
+
* document, and tells you whether it saw the docid on the way (and sets the
|
|
78
|
+
* result if so for your convenience).
|
|
79
79
|
*
|
|
80
80
|
* a next() followed by one or more advance() calls with the returned docid
|
|
81
81
|
* will set found = true and will not advance the stream beyond where it
|
|
@@ -96,21 +96,25 @@ static wp_error* conj_init_search_state(wp_query* q, wp_segment* s) RAISES_ERROR
|
|
|
96
96
|
static wp_error* disj_init_search_state(wp_query* q, wp_segment* s) RAISES_ERROR;
|
|
97
97
|
static wp_error* phrase_init_search_state(wp_query* q, wp_segment* s) RAISES_ERROR;
|
|
98
98
|
static wp_error* neg_init_search_state(wp_query* q, wp_segment* s) RAISES_ERROR;
|
|
99
|
+
static wp_error* every_init_search_state(wp_query* q, wp_segment* s) RAISES_ERROR;
|
|
99
100
|
static wp_error* term_release_search_state(wp_query* q) RAISES_ERROR;
|
|
100
101
|
static wp_error* conj_release_search_state(wp_query* q) RAISES_ERROR;
|
|
101
102
|
static wp_error* disj_release_search_state(wp_query* q) RAISES_ERROR;
|
|
102
103
|
static wp_error* phrase_release_search_state(wp_query* q) RAISES_ERROR;
|
|
103
104
|
static wp_error* neg_release_search_state(wp_query* q) RAISES_ERROR;
|
|
105
|
+
static wp_error* every_release_search_state(wp_query* q) RAISES_ERROR;
|
|
104
106
|
static wp_error* term_next_doc(wp_query* q, wp_segment* s, search_result* result, int* done) RAISES_ERROR;
|
|
105
107
|
static wp_error* conj_next_doc(wp_query* q, wp_segment* s, search_result* result, int* done) RAISES_ERROR;
|
|
106
108
|
static wp_error* disj_next_doc(wp_query* q, wp_segment* s, search_result* result, int* done) RAISES_ERROR;
|
|
107
109
|
static wp_error* phrase_next_doc(wp_query* q, wp_segment* s, search_result* result, int* done) RAISES_ERROR;
|
|
108
110
|
static wp_error* neg_next_doc(wp_query* q, wp_segment* s, search_result* result, int* done) RAISES_ERROR;
|
|
111
|
+
static wp_error* every_next_doc(wp_query* q, wp_segment* s, search_result* result, int* done) RAISES_ERROR;
|
|
109
112
|
static wp_error* term_advance_to_doc(wp_query* q, wp_segment* s, docid_t doc_id, search_result* result, int* found, int* done) RAISES_ERROR;
|
|
110
113
|
static wp_error* conj_advance_to_doc(wp_query* q, wp_segment* s, docid_t doc_id, search_result* result, int* found, int* done) RAISES_ERROR;
|
|
111
114
|
static wp_error* disj_advance_to_doc(wp_query* q, wp_segment* s, docid_t doc_id, search_result* result, int* found, int* done) RAISES_ERROR;
|
|
112
115
|
static wp_error* phrase_advance_to_doc(wp_query* q, wp_segment* s, docid_t doc_id, search_result* result, int* found, int* done) RAISES_ERROR;
|
|
113
116
|
static wp_error* neg_advance_to_doc(wp_query* q, wp_segment* s, docid_t doc_id, search_result* result, int* found, int* done) RAISES_ERROR;
|
|
117
|
+
static wp_error* every_advance_to_doc(wp_query* q, wp_segment* s, docid_t doc_id, search_result* result, int* found, int* done) RAISES_ERROR;
|
|
114
118
|
|
|
115
119
|
// the term_* functions also handle labels
|
|
116
120
|
// we use conj for empty queries as well (why not)
|
|
@@ -123,6 +127,7 @@ static wp_error* neg_advance_to_doc(wp_query* q, wp_segment* s, docid_t doc_id,
|
|
|
123
127
|
case WP_QUERY_DISJ: RELAY_ERROR(disj_##suffix(__VA_ARGS__)); break; \
|
|
124
128
|
case WP_QUERY_PHRASE: RELAY_ERROR(phrase_##suffix(__VA_ARGS__)); break; \
|
|
125
129
|
case WP_QUERY_NEG: RELAY_ERROR(neg_##suffix(__VA_ARGS__)); break; \
|
|
130
|
+
case WP_QUERY_EVERY: RELAY_ERROR(every_##suffix(__VA_ARGS__)); break; \
|
|
126
131
|
default: RAISE_ERROR("unknown query node type %d", type); \
|
|
127
132
|
} \
|
|
128
133
|
|
|
@@ -286,6 +291,20 @@ static wp_error* neg_release_search_state(wp_query* q) {
|
|
|
286
291
|
return NO_ERROR;
|
|
287
292
|
}
|
|
288
293
|
|
|
294
|
+
static wp_error* every_init_search_state(wp_query* q, wp_segment* seg) {
|
|
295
|
+
q->search_data = malloc(sizeof(docid_t));
|
|
296
|
+
|
|
297
|
+
postings_region* pr = MMAP_OBJ(seg->postings, postings_region);
|
|
298
|
+
*(docid_t*)q->search_data = pr->num_docs;
|
|
299
|
+
|
|
300
|
+
return NO_ERROR;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
static wp_error* every_release_search_state(wp_query* q) {
|
|
304
|
+
free(q->search_data);
|
|
305
|
+
return NO_ERROR;
|
|
306
|
+
}
|
|
307
|
+
|
|
289
308
|
/********** search functions **********/
|
|
290
309
|
|
|
291
310
|
static wp_error* term_next_doc(wp_query* q, wp_segment* s, search_result* result, int* done) {
|
|
@@ -722,6 +741,46 @@ static wp_error* neg_advance_to_doc(wp_query* q, wp_segment* seg, docid_t doc_id
|
|
|
722
741
|
return NO_ERROR;
|
|
723
742
|
}
|
|
724
743
|
|
|
744
|
+
static wp_error* every_next_doc(wp_query* q, wp_segment* seg, search_result* result, int* done) {
|
|
745
|
+
(void)seg; // don't actually need to look in here!
|
|
746
|
+
docid_t* state_doc_id = (docid_t*)q->search_data;
|
|
747
|
+
|
|
748
|
+
DEBUG("called with cur %u", *state_doc_id);
|
|
749
|
+
|
|
750
|
+
if(*state_doc_id == DOCID_NONE) {
|
|
751
|
+
*done = 1;
|
|
752
|
+
}
|
|
753
|
+
else {
|
|
754
|
+
result->doc_id = *state_doc_id;
|
|
755
|
+
result->num_doc_matches = 0;
|
|
756
|
+
result->doc_matches = NULL;
|
|
757
|
+
(*state_doc_id)--;
|
|
758
|
+
*done = 0;
|
|
759
|
+
}
|
|
760
|
+
return NO_ERROR;
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
static wp_error* every_advance_to_doc(wp_query* q, wp_segment* seg, docid_t doc_id, search_result* result, int* found, int* done) {
|
|
764
|
+
(void)seg; // don't actually need to look in here!
|
|
765
|
+
docid_t* state_doc_id = q->search_data;
|
|
766
|
+
|
|
767
|
+
DEBUG("called with cur %u", *state_doc_id);
|
|
768
|
+
|
|
769
|
+
if(*state_doc_id == DOCID_NONE) {
|
|
770
|
+
*found = 0;
|
|
771
|
+
}
|
|
772
|
+
else {
|
|
773
|
+
*state_doc_id = doc_id - 1; // just after that doc
|
|
774
|
+
*found = 1; // we find everyhing
|
|
775
|
+
result->doc_id = doc_id;
|
|
776
|
+
result->num_doc_matches = 0;
|
|
777
|
+
result->doc_matches = NULL;
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
*done = (*state_doc_id == DOCID_NONE ? 1 : 0);
|
|
781
|
+
return NO_ERROR;
|
|
782
|
+
}
|
|
783
|
+
|
|
725
784
|
wp_error* wp_search_run_query_on_segment(struct wp_query* q, struct wp_segment* s, uint32_t max_num_results, uint32_t* num_results, search_result* results) {
|
|
726
785
|
int done;
|
|
727
786
|
|
|
@@ -506,8 +506,8 @@ static yyconst struct yy_trans_info yy_transition[3537] =
|
|
|
506
506
|
{ 41, 839 }, { 42, 839 }, { 43, 839 }, { 44, 839 }, { 45, 839 },
|
|
507
507
|
{ 46, 839 }, { 47, 839 }, { 48, 581 }, { 49, 581 }, { 50, 581 },
|
|
508
508
|
{ 51, 581 }, { 52, 581 }, { 53, 581 }, { 54, 581 }, { 55, 581 },
|
|
509
|
-
{ 56, 581 }, { 57, 581 }, { 58, 839 }, { 59, 839 }, {
|
|
510
|
-
{ 61, 839 }, {
|
|
509
|
+
{ 56, 581 }, { 57, 581 }, { 58, 839 }, { 59, 839 }, { 0, 0 },
|
|
510
|
+
{ 61, 839 }, { 0, 0 }, { 63, 839 }, { 64, 839 }, { 65, 581 },
|
|
511
511
|
{ 66, 581 }, { 67, 581 }, { 68, 581 }, { 69, 581 }, { 70, 581 },
|
|
512
512
|
{ 71, 581 }, { 72, 581 }, { 73, 581 }, { 74, 581 }, { 75, 581 },
|
|
513
513
|
{ 76, 581 }, { 77, 581 }, { 78, 581 }, { 79, 581 }, { 80, 581 },
|
|
@@ -578,7 +578,7 @@ static yyconst struct yy_trans_info yy_transition[3537] =
|
|
|
578
578
|
{ 48,1091 }, { 49,1091 }, { 50,1091 }, { 51,1091 }, { 52,1091 },
|
|
579
579
|
{ 53,1091 }, { 54,1091 }, { 55,1091 }, { 56,1091 }, { 57,1091 },
|
|
580
580
|
|
|
581
|
-
{ 58, 516 }, { 59, 516 }, {
|
|
581
|
+
{ 58, 516 }, { 59, 516 }, { 0, 0 }, { 61, 516 }, { 0, 0 },
|
|
582
582
|
{ 63, 516 }, { 64, 516 }, { 65, 258 }, { 66, 258 }, { 67, 258 },
|
|
583
583
|
{ 68, 258 }, { 69, 258 }, { 70, 258 }, { 71, 258 }, { 72, 258 },
|
|
584
584
|
{ 73, 258 }, { 74, 258 }, { 75, 258 }, { 76, 258 }, { 77, 258 },
|
|
@@ -635,7 +635,7 @@ static yyconst struct yy_trans_info yy_transition[3537] =
|
|
|
635
635
|
|
|
636
636
|
{ 50,1091 }, { 51,1091 }, { 52,1091 }, { 53,1091 }, { 54,1091 },
|
|
637
637
|
{ 55,1091 }, { 56,1091 }, { 57,1091 }, { 58, 258 }, { 59, 258 },
|
|
638
|
-
{
|
|
638
|
+
{ 0, 0 }, { 61, 258 }, { 0, 0 }, { 63, 258 }, { 64, 258 },
|
|
639
639
|
{ 65,1091 }, { 66,1091 }, { 67,1091 }, { 68,1091 }, { 69,1091 },
|
|
640
640
|
{ 70,1091 }, { 71,1091 }, { 72,1091 }, { 73,1091 }, { 74,1091 },
|
|
641
641
|
{ 75,1091 }, { 76,1091 }, { 77,1091 }, { 78,1091 }, { 79,1091 },
|
|
@@ -691,8 +691,8 @@ static yyconst struct yy_trans_info yy_transition[3537] =
|
|
|
691
691
|
{ 42, 0 }, { 43, 0 }, { 44, 0 }, { 45, 0 }, { 46, 0 },
|
|
692
692
|
{ 47, 0 }, { 48, 833 }, { 49, 833 }, { 50, 833 }, { 51, 833 },
|
|
693
693
|
{ 52, 833 }, { 53, 833 }, { 54, 833 }, { 55, 833 }, { 56, 833 },
|
|
694
|
-
{ 57, 833 }, { 58, 0 }, { 59, 0 }, {
|
|
695
|
-
{
|
|
694
|
+
{ 57, 833 }, { 58, 0 }, { 59, 0 }, { 0, 0 }, { 61, 0 },
|
|
695
|
+
{ 0, 0 }, { 63, 0 }, { 64, 0 }, { 65, 833 }, { 66, 833 },
|
|
696
696
|
{ 67, 833 }, { 68, 833 }, { 69, 833 }, { 70, 833 }, { 71, 833 },
|
|
697
697
|
{ 72, 833 }, { 73, 833 }, { 74, 833 }, { 75, 833 }, { 76, 833 },
|
|
698
698
|
{ 77, 833 }, { 78, 833 }, { 79, 833 }, { 80, 833 }, { 81, 833 },
|
|
@@ -761,7 +761,7 @@ static yyconst struct yy_trans_info yy_transition[3537] =
|
|
|
761
761
|
{ 45,-317 }, { 46,-317 }, { 47,-317 }, { 48, 833 }, { 49, 833 },
|
|
762
762
|
{ 50, 833 }, { 51, 833 }, { 52, 833 }, { 53, 833 }, { 54, 833 },
|
|
763
763
|
{ 55, 833 }, { 56, 833 }, { 57, 833 }, { 58,-317 }, { 59,-317 },
|
|
764
|
-
{
|
|
764
|
+
{ 0, 0 }, { 61,-317 }, { 0, 0 }, { 63,-317 }, { 64,-317 },
|
|
765
765
|
{ 65, 516 }, { 66, 516 }, { 67, 516 }, { 68, 516 }, { 69, 516 },
|
|
766
766
|
{ 70, 516 }, { 71, 516 }, { 72, 516 }, { 73, 516 }, { 74, 516 },
|
|
767
767
|
|
|
@@ -817,8 +817,8 @@ static yyconst struct yy_trans_info yy_transition[3537] =
|
|
|
817
817
|
{ 42,-575 }, { 43,-575 }, { 44,-575 }, { 45,-575 }, { 46,-258 },
|
|
818
818
|
{ 47,-575 }, { 48, 833 }, { 49, 833 }, { 50, 833 }, { 51, 833 },
|
|
819
819
|
{ 52, 833 }, { 53, 833 }, { 54, 833 }, { 55, 833 }, { 56, 833 },
|
|
820
|
-
{ 57, 833 }, { 58,-575 }, { 59,-575 }, {
|
|
821
|
-
{
|
|
820
|
+
{ 57, 833 }, { 58,-575 }, { 59,-575 }, { 0, 0 }, { 61,-575 },
|
|
821
|
+
{ 0, 0 }, { 63,-575 }, { 64,-575 }, { 65, 258 }, { 66, 258 },
|
|
822
822
|
|
|
823
823
|
{ 67, 258 }, { 68, 258 }, { 69, 258 }, { 70, 258 }, { 71, 258 },
|
|
824
824
|
{ 72, 258 }, { 73, 258 }, { 74, 258 }, { 75, 258 }, { 76, 258 },
|
|
@@ -875,7 +875,7 @@ static yyconst struct yy_trans_info yy_transition[3537] =
|
|
|
875
875
|
{ 49, 0 }, { 50, 0 }, { 51, 0 }, { 52, 0 }, { 53, 0 },
|
|
876
876
|
{ 54, 0 }, { 55, 0 }, { 56, 0 }, { 57, 0 }, { 58,-833 },
|
|
877
877
|
|
|
878
|
-
{ 59,-833 }, {
|
|
878
|
+
{ 59,-833 }, { 0, 0 }, { 61,-833 }, { 0, 0 }, { 63,-833 },
|
|
879
879
|
{ 64,-833 }, { 65, 0 }, { 66, 0 }, { 67, 0 }, { 68, 0 },
|
|
880
880
|
{ 69, 0 }, { 70, 0 }, { 71, 0 }, { 72, 0 }, { 73, 0 },
|
|
881
881
|
{ 74, 0 }, { 75, 0 }, { 76, 0 }, { 77, 0 }, { 78, 0 },
|
|
@@ -944,8 +944,8 @@ static yyconst struct yy_trans_info yy_transition[3537] =
|
|
|
944
944
|
{ 42,-1150 }, { 43,-1150 }, { 44,-1150 }, { 45,-1150 }, { 46,-1150 },
|
|
945
945
|
{ 47,-1150 }, { 48, 0 }, { 49, 0 }, { 50, 0 }, { 51, 0 },
|
|
946
946
|
{ 52, 0 }, { 53, 0 }, { 54, 0 }, { 55, 0 }, { 56, 0 },
|
|
947
|
-
{ 57, 0 }, { 58,-1150 }, { 59,-1150 }, {
|
|
948
|
-
{
|
|
947
|
+
{ 57, 0 }, { 58,-1150 }, { 59,-1150 }, { 0, 0 }, { 61,-1150 },
|
|
948
|
+
{ 0, 0 }, { 63,-1150 }, { 64,-1150 }, { 65,-317 }, { 66,-317 },
|
|
949
949
|
{ 67,-317 }, { 68,-317 }, { 69,-317 }, { 70,-317 }, { 71,-317 },
|
|
950
950
|
{ 72,-317 }, { 73,-317 }, { 74,-317 }, { 75,-317 }, { 76,-317 },
|
|
951
951
|
{ 77,-317 }, { 78,-317 }, { 79,-317 }, { 80,-317 }, { 81,-317 },
|
|
@@ -1001,7 +1001,7 @@ static yyconst struct yy_trans_info yy_transition[3537] =
|
|
|
1001
1001
|
{ 44,-1408 }, { 45,-1408 }, { 46,-1091 }, { 47,-1408 }, { 48, 0 },
|
|
1002
1002
|
{ 49, 0 }, { 50, 0 }, { 51, 0 }, { 52, 0 }, { 53, 0 },
|
|
1003
1003
|
{ 54, 0 }, { 55, 0 }, { 56, 0 }, { 57, 0 }, { 58,-1408 },
|
|
1004
|
-
{ 59,-1408 }, {
|
|
1004
|
+
{ 59,-1408 }, { 0, 0 }, { 61,-1408 }, { 0, 0 }, { 63,-1408 },
|
|
1005
1005
|
{ 64,-1408 }, { 65,-575 }, { 66,-575 }, { 67,-575 }, { 68,-575 },
|
|
1006
1006
|
{ 69,-575 }, { 70,-575 }, { 71,-575 }, { 72,-575 }, { 73,-575 },
|
|
1007
1007
|
{ 74,-575 }, { 75,-575 }, { 76,-575 }, { 77,-575 }, { 78,-575 },
|
metadata
CHANGED
|
@@ -1,33 +1,29 @@
|
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: whistlepig
|
|
3
|
-
version: !ruby/object:Gem::Version
|
|
4
|
-
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: '0.7'
|
|
5
5
|
prerelease:
|
|
6
|
-
segments:
|
|
7
|
-
- 0
|
|
8
|
-
- 6
|
|
9
|
-
version: "0.6"
|
|
10
6
|
platform: ruby
|
|
11
|
-
authors:
|
|
7
|
+
authors:
|
|
12
8
|
- William Morgan
|
|
13
9
|
autorequire:
|
|
14
10
|
bindir: bin
|
|
15
11
|
cert_chain: []
|
|
16
|
-
|
|
17
|
-
date: 2011-04-28 21:39:58 -07:00
|
|
12
|
+
date: 2011-06-19 13:10:35.000000000 -07:00
|
|
18
13
|
default_executable:
|
|
19
14
|
dependencies: []
|
|
20
|
-
|
|
21
|
-
|
|
15
|
+
description: Whistlepig is a minimalist realtime full-text search index. Its goal
|
|
16
|
+
is to be as small and minimally-featured as possible, while still remaining useful,
|
|
17
|
+
performant and scalable to large corpora. If you want realtime full-text search
|
|
18
|
+
without the frills, Whistlepig may be for you.
|
|
22
19
|
email: wmorgan-whistlepig-gemspec@masanjin.net
|
|
23
20
|
executables: []
|
|
24
|
-
|
|
25
|
-
extensions:
|
|
21
|
+
extensions:
|
|
26
22
|
- ext/whistlepig/extconf.rb
|
|
27
|
-
extra_rdoc_files:
|
|
23
|
+
extra_rdoc_files:
|
|
28
24
|
- README
|
|
29
25
|
- ext/whistlepig/whistlepigc.c
|
|
30
|
-
files:
|
|
26
|
+
files:
|
|
31
27
|
- README
|
|
32
28
|
- ext/whistlepig/extconf.rb
|
|
33
29
|
- lib/whistlepig.rb
|
|
@@ -67,41 +63,32 @@ files:
|
|
|
67
63
|
has_rdoc: true
|
|
68
64
|
homepage: http://masanjin.net/whistlepig
|
|
69
65
|
licenses: []
|
|
70
|
-
|
|
71
66
|
post_install_message:
|
|
72
|
-
rdoc_options:
|
|
67
|
+
rdoc_options:
|
|
73
68
|
- -c
|
|
74
69
|
- utf8
|
|
75
70
|
- --main
|
|
76
71
|
- README
|
|
77
72
|
- --title
|
|
78
73
|
- Whistlepig
|
|
79
|
-
require_paths:
|
|
74
|
+
require_paths:
|
|
80
75
|
- lib
|
|
81
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
|
76
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
82
77
|
none: false
|
|
83
|
-
requirements:
|
|
84
|
-
- -
|
|
85
|
-
- !ruby/object:Gem::Version
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
- 0
|
|
89
|
-
version: "0"
|
|
90
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
78
|
+
requirements:
|
|
79
|
+
- - ! '>='
|
|
80
|
+
- !ruby/object:Gem::Version
|
|
81
|
+
version: '0'
|
|
82
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
91
83
|
none: false
|
|
92
|
-
requirements:
|
|
93
|
-
- -
|
|
94
|
-
- !ruby/object:Gem::Version
|
|
95
|
-
|
|
96
|
-
segments:
|
|
97
|
-
- 0
|
|
98
|
-
version: "0"
|
|
84
|
+
requirements:
|
|
85
|
+
- - ! '>='
|
|
86
|
+
- !ruby/object:Gem::Version
|
|
87
|
+
version: '0'
|
|
99
88
|
requirements: []
|
|
100
|
-
|
|
101
89
|
rubyforge_project:
|
|
102
90
|
rubygems_version: 1.6.0
|
|
103
91
|
signing_key:
|
|
104
92
|
specification_version: 3
|
|
105
93
|
summary: a minimalist realtime full-text search index
|
|
106
94
|
test_files: []
|
|
107
|
-
|