ferret 0.10.4 → 0.10.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -569,7 +569,7 @@ static TokenStream *
569
569
  frt_get_cwrapped_rts(VALUE rts)
570
570
  {
571
571
  TokenStream *ts;
572
- if (rb_ivar_get(CLASS_OF(rts), id_cclass) == Qtrue) {
572
+ if (frt_is_cclass(rts) && DATA_PTR(rts)) {
573
573
  GET_TS(ts, rts);
574
574
  REF(ts);
575
575
  }
@@ -972,7 +972,7 @@ frt_stop_filter_init(int argc, VALUE *argv, VALUE self)
972
972
  * StemFilter.new(token_stream) -> token_stream
973
973
  * StemFilter.new(token_stream,
974
974
  * algorithm="english",
975
- * encoding=locale-specific) -> token_stream
975
+ * encoding="UTF-8") -> token_stream
976
976
  *
977
977
  * Create an StemFilter which uses a snowball stemmer (thankyou Martin
978
978
  * Porter) to stem words. You can optionally specify the algorithm (default:
@@ -1034,7 +1034,7 @@ static TokenStream *
1034
1034
  cwa_get_ts(Analyzer *a, char *field, char *text)
1035
1035
  {
1036
1036
  VALUE rts = rb_funcall(CWA(a)->ranalyzer, id_token_stream, 2,
1037
- rb_str_new2(field), rb_str_new2(text));
1037
+ rb_intern(field), rb_str_new2(text));
1038
1038
  return frt_get_cwrapped_rts(rts);
1039
1039
  }
1040
1040
 
@@ -1042,7 +1042,7 @@ Analyzer *
1042
1042
  frt_get_cwrapped_analyzer(VALUE ranalyzer)
1043
1043
  {
1044
1044
  Analyzer *a = NULL;
1045
- if (rb_ivar_get(CLASS_OF(ranalyzer), id_cclass) == Qtrue) {
1045
+ if (frt_is_cclass(ranalyzer) && DATA_PTR(ranalyzer)) {
1046
1046
  Data_Get_Struct(ranalyzer, Analyzer, a);
1047
1047
  REF(a);
1048
1048
  }
@@ -1230,7 +1230,7 @@ frt_a_standard_analyzer_init(int argc, VALUE *argv, VALUE self)
1230
1230
  bool lower;
1231
1231
  VALUE rlower, rstop_words;
1232
1232
  Analyzer *a;
1233
- rb_scan_args(argc, argv, "02", &rlower, &rstop_words);
1233
+ rb_scan_args(argc, argv, "02", &rstop_words, &rlower);
1234
1234
  lower = ((rlower == Qnil) ? true : RTEST(rlower));
1235
1235
  if (rstop_words != Qnil) {
1236
1236
  char **stop_words = get_stopwords(rstop_words);
@@ -1246,7 +1246,7 @@ frt_a_standard_analyzer_init(int argc, VALUE *argv, VALUE self)
1246
1246
 
1247
1247
  /*
1248
1248
  * call-seq:
1249
- * StandardAnalyzer.new(lower = true, stop_words = ENGLISH_STOP_WORDS)
1249
+ * StandardAnalyzer.new(stop_words=ENGLISH_STOP_WORDS, lower=true)
1250
1250
  * -> analyzer
1251
1251
  *
1252
1252
  * Create a new StandardAnalyzer which downcases tokens by default but can
@@ -1330,7 +1330,7 @@ frt_per_field_analyzer_add_field(VALUE self, VALUE rfield, VALUE ranalyzer)
1330
1330
  Data_Get_Struct(self, Analyzer, pfa);
1331
1331
  a = frt_get_cwrapped_analyzer(ranalyzer);
1332
1332
 
1333
- pfa_add_field(pfa, StringValuePtr(rfield), a);
1333
+ pfa_add_field(pfa, frt_field(rfield), a);
1334
1334
  return self;
1335
1335
  }
1336
1336
 
@@ -1483,7 +1483,8 @@ static void Init_Token(void)
1483
1483
  */
1484
1484
  static void Init_TokenStream(void)
1485
1485
  {
1486
- cTokenStream = frt_define_class_under(mAnalysis, "TokenStream", rb_cObject);
1486
+ cTokenStream = rb_define_class_under(mAnalysis, "TokenStream", rb_cObject);
1487
+ frt_mark_cclass(cTokenStream);
1487
1488
  rb_define_method(cTokenStream, "next", frt_ts_next, 0);
1488
1489
  rb_define_method(cTokenStream, "text=", frt_ts_set_text, 1);
1489
1490
  rb_define_method(cTokenStream, "text", frt_ts_get_text, 0);
@@ -1504,7 +1505,8 @@ static void Init_TokenStream(void)
1504
1505
  static void Init_AsciiLetterTokenizer(void)
1505
1506
  {
1506
1507
  cAsciiLetterTokenizer =
1507
- frt_define_class_under(mAnalysis, "AsciiLetterTokenizer", cTokenStream);
1508
+ rb_define_class_under(mAnalysis, "AsciiLetterTokenizer", cTokenStream);
1509
+ frt_mark_cclass(cAsciiLetterTokenizer);
1508
1510
  rb_define_alloc_func(cAsciiLetterTokenizer, frt_data_alloc);
1509
1511
  rb_define_method(cAsciiLetterTokenizer, "initialize",
1510
1512
  frt_a_letter_tokenizer_init, 1);
@@ -1526,7 +1528,8 @@ static void Init_AsciiLetterTokenizer(void)
1526
1528
  static void Init_LetterTokenizer(void)
1527
1529
  {
1528
1530
  cLetterTokenizer =
1529
- frt_define_class_under(mAnalysis, "LetterTokenizer", cTokenStream);
1531
+ rb_define_class_under(mAnalysis, "LetterTokenizer", cTokenStream);
1532
+ frt_mark_cclass(cLetterTokenizer);
1530
1533
  rb_define_alloc_func(cLetterTokenizer, frt_data_alloc);
1531
1534
  rb_define_method(cLetterTokenizer, "initialize",
1532
1535
  frt_letter_tokenizer_init, -1);
@@ -1546,8 +1549,9 @@ static void Init_LetterTokenizer(void)
1546
1549
  static void Init_AsciiWhiteSpaceTokenizer(void)
1547
1550
  {
1548
1551
  cAsciiWhiteSpaceTokenizer =
1549
- frt_define_class_under(mAnalysis, "AsciiWhiteSpaceTokenizer",
1552
+ rb_define_class_under(mAnalysis, "AsciiWhiteSpaceTokenizer",
1550
1553
  cTokenStream);
1554
+ frt_mark_cclass(cAsciiWhiteSpaceTokenizer);
1551
1555
  rb_define_alloc_func(cAsciiWhiteSpaceTokenizer, frt_data_alloc);
1552
1556
  rb_define_method(cAsciiWhiteSpaceTokenizer, "initialize",
1553
1557
  frt_a_whitespace_tokenizer_init, 1);
@@ -1567,7 +1571,8 @@ static void Init_AsciiWhiteSpaceTokenizer(void)
1567
1571
  static void Init_WhiteSpaceTokenizer(void)
1568
1572
  {
1569
1573
  cWhiteSpaceTokenizer =
1570
- frt_define_class_under(mAnalysis, "WhiteSpaceTokenizer", cTokenStream);
1574
+ rb_define_class_under(mAnalysis, "WhiteSpaceTokenizer", cTokenStream);
1575
+ frt_mark_cclass(cWhiteSpaceTokenizer);
1571
1576
  rb_define_alloc_func(cWhiteSpaceTokenizer, frt_data_alloc);
1572
1577
  rb_define_method(cWhiteSpaceTokenizer, "initialize",
1573
1578
  frt_whitespace_tokenizer_init, -1);
@@ -1588,7 +1593,8 @@ static void Init_WhiteSpaceTokenizer(void)
1588
1593
  static void Init_AsciiStandardTokenizer(void)
1589
1594
  {
1590
1595
  cAsciiStandardTokenizer =
1591
- frt_define_class_under(mAnalysis, "AsciiStandardTokenizer", cTokenStream);
1596
+ rb_define_class_under(mAnalysis, "AsciiStandardTokenizer", cTokenStream);
1597
+ frt_mark_cclass(cAsciiStandardTokenizer);
1592
1598
  rb_define_alloc_func(cAsciiStandardTokenizer, frt_data_alloc);
1593
1599
  rb_define_method(cAsciiStandardTokenizer, "initialize",
1594
1600
  frt_a_standard_tokenizer_init, 1);
@@ -1609,7 +1615,8 @@ static void Init_AsciiStandardTokenizer(void)
1609
1615
  static void Init_StandardTokenizer(void)
1610
1616
  {
1611
1617
  cStandardTokenizer =
1612
- frt_define_class_under(mAnalysis, "StandardTokenizer", cTokenStream);
1618
+ rb_define_class_under(mAnalysis, "StandardTokenizer", cTokenStream);
1619
+ frt_mark_cclass(cStandardTokenizer);
1613
1620
  rb_define_alloc_func(cStandardTokenizer, frt_data_alloc);
1614
1621
  rb_define_method(cStandardTokenizer, "initialize",
1615
1622
  frt_standard_tokenizer_init, 1);
@@ -1636,7 +1643,8 @@ static void Init_StandardTokenizer(void)
1636
1643
  static void Init_RegExpTokenizer(void)
1637
1644
  {
1638
1645
  cRegExpTokenizer =
1639
- frt_define_class_under(mAnalysis, "RegExpTokenizer", cTokenStream);
1646
+ rb_define_class_under(mAnalysis, "RegExpTokenizer", cTokenStream);
1647
+ frt_mark_cclass(cRegExpTokenizer);
1640
1648
  rtoken_re = rb_reg_new(TOKEN_RE, strlen(TOKEN_RE), 0);
1641
1649
  rb_define_const(cRegExpTokenizer, "REGEXP", rtoken_re);
1642
1650
  rb_define_alloc_func(cRegExpTokenizer, frt_data_alloc);
@@ -1664,7 +1672,8 @@ static void Init_RegExpTokenizer(void)
1664
1672
  static void Init_AsciiLowerCaseFilter(void)
1665
1673
  {
1666
1674
  cAsciiLowerCaseFilter =
1667
- frt_define_class_under(mAnalysis, "AsciiLowerCaseFilter", cTokenStream);
1675
+ rb_define_class_under(mAnalysis, "AsciiLowerCaseFilter", cTokenStream);
1676
+ frt_mark_cclass(cAsciiLowerCaseFilter);
1668
1677
  rb_define_alloc_func(cAsciiLowerCaseFilter, frt_data_alloc);
1669
1678
  rb_define_method(cAsciiLowerCaseFilter, "initialize",
1670
1679
  frt_a_lowercase_filter_init, 1);
@@ -1684,7 +1693,8 @@ static void Init_AsciiLowerCaseFilter(void)
1684
1693
  static void Init_LowerCaseFilter(void)
1685
1694
  {
1686
1695
  cLowerCaseFilter =
1687
- frt_define_class_under(mAnalysis, "LowerCaseFilter", cTokenStream);
1696
+ rb_define_class_under(mAnalysis, "LowerCaseFilter", cTokenStream);
1697
+ frt_mark_cclass(cLowerCaseFilter);
1688
1698
  rb_define_alloc_func(cLowerCaseFilter, frt_data_alloc);
1689
1699
  rb_define_method(cLowerCaseFilter, "initialize",
1690
1700
  frt_lowercase_filter_init, 1);
@@ -1706,7 +1716,8 @@ static void Init_LowerCaseFilter(void)
1706
1716
  static void Init_HyphenFilter(void)
1707
1717
  {
1708
1718
  cHyphenFilter =
1709
- frt_define_class_under(mAnalysis, "HyphenFilter", cTokenStream);
1719
+ rb_define_class_under(mAnalysis, "HyphenFilter", cTokenStream);
1720
+ frt_mark_cclass(cHyphenFilter);
1710
1721
  rb_define_alloc_func(cHyphenFilter, frt_data_alloc);
1711
1722
  rb_define_method(cHyphenFilter, "initialize", frt_hyphen_filter_init, 1);
1712
1723
  }
@@ -1725,7 +1736,8 @@ static void Init_HyphenFilter(void)
1725
1736
  static void Init_StopFilter(void)
1726
1737
  {
1727
1738
  cStopFilter =
1728
- frt_define_class_under(mAnalysis, "StopFilter", cTokenStream);
1739
+ rb_define_class_under(mAnalysis, "StopFilter", cTokenStream);
1740
+ frt_mark_cclass(cStopFilter);
1729
1741
  rb_define_alloc_func(cStopFilter, frt_data_alloc);
1730
1742
  rb_define_method(cStopFilter, "initialize",
1731
1743
  frt_stop_filter_init, -1);
@@ -1738,14 +1750,10 @@ static void Init_StopFilter(void)
1738
1750
  *
1739
1751
  * A StemFilter takes a term and transforms the term as per the SnowBall
1740
1752
  * stemming algorithm. Note: the input to the stemming filter must already
1741
- * be in lower case, so you will need to use LowerCaseFilter or
1742
- * LowerCaseTokenizer further down the Tokenizer chain in order for this to
1743
- * work properly!
1753
+ * be in lower case, so you will need to use LowerCaseFilter or lowercasing
1754
+ * Tokenizer further down the Tokenizer chain in order for this to work
1755
+ * properly!
1744
1756
  *
1745
- * To use this filter with other analyzers, you'll want to write an Analyzer
1746
- * class that sets up the TokenStream chain as you want it. To use this with
1747
- * LowerCaseTokenizer, for example, you'd write an analyzer like this:
1748
- *
1749
1757
  * === Available algorithms and encodings
1750
1758
  *
1751
1759
  * Algorithm Algorithm Pseudonyms Encoding
@@ -1766,6 +1774,10 @@ static void Init_StopFilter(void)
1766
1774
  *
1767
1775
  * === Example
1768
1776
  *
1777
+ * To use this filter with other analyzers, you'll want to write an Analyzer
1778
+ * class that sets up the TokenStream chain as you want it. To use this with
1779
+ * a lowercasing Tokenizer, for example, you'd write an analyzer like this:
1780
+ *
1769
1781
  * def MyAnalyzer < Analyzer
1770
1782
  * def token_stream(field, str)
1771
1783
  * return StemFilter.new(LowerCaseFilter.new(StandardTokenizer.new(str)))
@@ -1784,7 +1796,8 @@ static void Init_StopFilter(void)
1784
1796
  static void Init_StemFilter(void)
1785
1797
  {
1786
1798
  cStemFilter =
1787
- frt_define_class_under(mAnalysis, "StemFilter", cTokenStream);
1799
+ rb_define_class_under(mAnalysis, "StemFilter", cTokenStream);
1800
+ frt_mark_cclass(cStemFilter);
1788
1801
  rb_define_alloc_func(cStemFilter, frt_data_alloc);
1789
1802
  rb_define_method(cStemFilter, "initialize",
1790
1803
  frt_stem_filter_init, -1);
@@ -1827,7 +1840,8 @@ static void Init_StemFilter(void)
1827
1840
  static void Init_Analyzer(void)
1828
1841
  {
1829
1842
  cAnalyzer =
1830
- frt_define_class_under(mAnalysis, "Analyzer", rb_cObject);
1843
+ rb_define_class_under(mAnalysis, "Analyzer", rb_cObject);
1844
+ frt_mark_cclass(cAnalyzer);
1831
1845
  rb_define_alloc_func(cAnalyzer, frt_data_alloc);
1832
1846
  rb_define_method(cAnalyzer, "initialize", frt_letter_analyzer_init, -1);
1833
1847
  rb_define_method(cAnalyzer, "token_stream", frt_analyzer_token_stream, 2);
@@ -1864,7 +1878,8 @@ static void Init_Analyzer(void)
1864
1878
  static void Init_AsciiLetterAnalyzer(void)
1865
1879
  {
1866
1880
  cAsciiLetterAnalyzer =
1867
- frt_define_class_under(mAnalysis, "AsciiLetterAnalyzer", cAnalyzer);
1881
+ rb_define_class_under(mAnalysis, "AsciiLetterAnalyzer", cAnalyzer);
1882
+ frt_mark_cclass(cAsciiLetterAnalyzer);
1868
1883
  rb_define_alloc_func(cAsciiLetterAnalyzer, frt_data_alloc);
1869
1884
  rb_define_method(cAsciiLetterAnalyzer, "initialize",
1870
1885
  frt_a_letter_analyzer_init, -1);
@@ -1894,7 +1909,8 @@ static void Init_AsciiLetterAnalyzer(void)
1894
1909
  static void Init_LetterAnalyzer(void)
1895
1910
  {
1896
1911
  cLetterAnalyzer =
1897
- frt_define_class_under(mAnalysis, "LetterAnalyzer", cAnalyzer);
1912
+ rb_define_class_under(mAnalysis, "LetterAnalyzer", cAnalyzer);
1913
+ frt_mark_cclass(cLetterAnalyzer);
1898
1914
  rb_define_alloc_func(cLetterAnalyzer, frt_data_alloc);
1899
1915
  rb_define_method(cLetterAnalyzer, "initialize",
1900
1916
  frt_letter_analyzer_init, -1);
@@ -1930,7 +1946,8 @@ static void Init_LetterAnalyzer(void)
1930
1946
  static void Init_AsciiWhiteSpaceAnalyzer(void)
1931
1947
  {
1932
1948
  cAsciiWhiteSpaceAnalyzer =
1933
- frt_define_class_under(mAnalysis, "AsciiWhiteSpaceAnalyzer", cAnalyzer);
1949
+ rb_define_class_under(mAnalysis, "AsciiWhiteSpaceAnalyzer", cAnalyzer);
1950
+ frt_mark_cclass(cAsciiWhiteSpaceAnalyzer);
1934
1951
  rb_define_alloc_func(cAsciiWhiteSpaceAnalyzer, frt_data_alloc);
1935
1952
  rb_define_method(cAsciiWhiteSpaceAnalyzer, "initialize",
1936
1953
  frt_a_white_space_analyzer_init, -1);
@@ -1960,7 +1977,8 @@ static void Init_AsciiWhiteSpaceAnalyzer(void)
1960
1977
  static void Init_WhiteSpaceAnalyzer(void)
1961
1978
  {
1962
1979
  cWhiteSpaceAnalyzer =
1963
- frt_define_class_under(mAnalysis, "WhiteSpaceAnalyzer", cAnalyzer);
1980
+ rb_define_class_under(mAnalysis, "WhiteSpaceAnalyzer", cAnalyzer);
1981
+ frt_mark_cclass(cWhiteSpaceAnalyzer);
1964
1982
  rb_define_alloc_func(cWhiteSpaceAnalyzer, frt_data_alloc);
1965
1983
  rb_define_method(cWhiteSpaceAnalyzer, "initialize",
1966
1984
  frt_white_space_analyzer_init, -1);
@@ -1975,18 +1993,16 @@ static void Init_WhiteSpaceAnalyzer(void)
1975
1993
  * ascii-analyzers. If it were implemented in Ruby it would look like this;
1976
1994
  *
1977
1995
  * class AsciiStandardAnalyzer
1978
- * def initialize(lower = true, stop_words = ENGLISH_STOP_WORDS)
1996
+ * def initialize(stop_words = ENGLISH_STOP_WORDS, lower = true)
1979
1997
  * @lower = lower
1980
1998
  * @stop_words = stop_words
1981
1999
  * end
1982
2000
  *
1983
2001
  * def token_stream(field, str)
1984
- * if @lower
1985
- * return StopFilter.new(AsciiLowerCaseFilter.new(
1986
- * AsciiStandardTokenizer.new(str)), @stop_words)
1987
- * else
1988
- * return StopFilter.new(AsciiStandardTokenizer.new(str), @stop_words)
1989
- * end
2002
+ * ts = AsciiStandardTokenizer.new(str)
2003
+ * ts = AsciiLowerCaseFilter.new(ts) if @lower
2004
+ * ts = StopFilter.new(ts, @stop_words)
2005
+ * ts = HyphenFilter.new(ts)
1990
2006
  * end
1991
2007
  * end
1992
2008
  *
@@ -1998,7 +2014,8 @@ static void Init_WhiteSpaceAnalyzer(void)
1998
2014
  static void Init_AsciiStandardAnalyzer(void)
1999
2015
  {
2000
2016
  cAsciiStandardAnalyzer =
2001
- frt_define_class_under(mAnalysis, "AsciiStandardAnalyzer", cAnalyzer);
2017
+ rb_define_class_under(mAnalysis, "AsciiStandardAnalyzer", cAnalyzer);
2018
+ frt_mark_cclass(cAsciiStandardAnalyzer);
2002
2019
  rb_define_alloc_func(cAsciiStandardAnalyzer, frt_data_alloc);
2003
2020
  rb_define_method(cAsciiStandardAnalyzer, "initialize",
2004
2021
  frt_a_standard_analyzer_init, -1);
@@ -2013,13 +2030,16 @@ static void Init_AsciiStandardAnalyzer(void)
2013
2030
  * it were implemented in Ruby it would look like this;
2014
2031
  *
2015
2032
  * class StandardAnalyzer
2016
- * def initialize(lower = true, stop_words = ENGLISH_STOP_WORDS)
2033
+ * def initialize(stop_words = ENGLISH_STOP_WORDS, lower = true)
2017
2034
  * @lower = lower
2018
2035
  * @stop_words = stop_words
2019
2036
  * end
2020
2037
  *
2021
2038
  * def token_stream(field, str)
2022
- * return StopFilter.new(StandardTokenizer.new(str, @lower), @stop_words)
2039
+ * ts = StandardTokenizer.new(str)
2040
+ * ts = LowerCaseFilter.new(ts) if @lower
2041
+ * ts = StopFilter.new(ts, @stop_words)
2042
+ * ts = HyphenFilter.new(ts)
2023
2043
  * end
2024
2044
  * end
2025
2045
  *
@@ -2029,7 +2049,8 @@ static void Init_AsciiStandardAnalyzer(void)
2029
2049
  static void Init_StandardAnalyzer(void)
2030
2050
  {
2031
2051
  cStandardAnalyzer =
2032
- frt_define_class_under(mAnalysis, "StandardAnalyzer", cAnalyzer);
2052
+ rb_define_class_under(mAnalysis, "StandardAnalyzer", cAnalyzer);
2053
+ frt_mark_cclass(cStandardAnalyzer);
2033
2054
  rb_define_alloc_func(cStandardAnalyzer, frt_data_alloc);
2034
2055
  rb_define_method(cStandardAnalyzer, "initialize",
2035
2056
  frt_standard_analyzer_init, -1);
@@ -2058,7 +2079,8 @@ static void Init_StandardAnalyzer(void)
2058
2079
  static void Init_PerFieldAnalyzer(void)
2059
2080
  {
2060
2081
  cPerFieldAnalyzer =
2061
- frt_define_class_under(mAnalysis, "PerFieldAnalyzer", cAnalyzer);
2082
+ rb_define_class_under(mAnalysis, "PerFieldAnalyzer", cAnalyzer);
2083
+ frt_mark_cclass(cPerFieldAnalyzer);
2062
2084
  rb_define_alloc_func(cPerFieldAnalyzer, frt_data_alloc);
2063
2085
  rb_define_method(cPerFieldAnalyzer, "initialize",
2064
2086
  frt_per_field_analyzer_init, 1);
@@ -2098,7 +2120,8 @@ static void Init_PerFieldAnalyzer(void)
2098
2120
  static void Init_RegExpAnalyzer(void)
2099
2121
  {
2100
2122
  cRegExpAnalyzer =
2101
- frt_define_class_under(mAnalysis, "RegExpAnalyzer", cAnalyzer);
2123
+ rb_define_class_under(mAnalysis, "RegExpAnalyzer", cAnalyzer);
2124
+ frt_mark_cclass(cRegExpAnalyzer);
2102
2125
  rb_define_alloc_func(cRegExpAnalyzer, frt_data_alloc);
2103
2126
  rb_define_method(cRegExpAnalyzer, "initialize",
2104
2127
  frt_re_analyzer_init, -1);
@@ -94,10 +94,12 @@ frt_fi_get_params(VALUE roptions,
94
94
  if (Qnil != v) Check_Type(v, T_SYMBOL);
95
95
  if (v == sym_no || v == sym_false || v == Qfalse) {
96
96
  *store = STORE_NO;
97
- } else if (v == sym_yes || v == sym_true || v == Qtrue || v == Qnil) {
97
+ } else if (v == sym_yes || v == sym_true || v == Qtrue) {
98
98
  *store = STORE_YES;
99
99
  } else if (v == sym_compress || v == sym_compressed) {
100
100
  *store = STORE_COMPRESS;
101
+ } else if (v == Qnil) {
102
+ /* leave as default */
101
103
  } else {
102
104
  rb_raise(rb_eArgError, ":%s isn't a valid argument for :store."
103
105
  " Please choose from [:yes, :no, :compressed]",
@@ -108,7 +110,7 @@ frt_fi_get_params(VALUE roptions,
108
110
  if (Qnil != v) Check_Type(v, T_SYMBOL);
109
111
  if (v == sym_no || v == sym_false || v == Qfalse) {
110
112
  *index = INDEX_NO;
111
- } else if (v == sym_yes || v == sym_true || v == Qtrue || v == Qnil) {
113
+ } else if (v == sym_yes || v == sym_true || v == Qtrue) {
112
114
  *index = INDEX_YES;
113
115
  } else if (v == sym_untokenized) {
114
116
  *index = INDEX_UNTOKENIZED;
@@ -116,6 +118,8 @@ frt_fi_get_params(VALUE roptions,
116
118
  *index = INDEX_YES_OMIT_NORMS;
117
119
  } else if (v == sym_untokenized_omit_norms) {
118
120
  *index = INDEX_UNTOKENIZED_OMIT_NORMS;
121
+ } else if (v == Qnil) {
122
+ /* leave as default */
119
123
  } else {
120
124
  rb_raise(rb_eArgError, ":%s isn't a valid argument for :index."
121
125
  " Please choose from [:no, :yes, :untokenized, "
@@ -133,8 +137,10 @@ frt_fi_get_params(VALUE roptions,
133
137
  *term_vector = TERM_VECTOR_WITH_POSITIONS;
134
138
  } else if (v == sym_with_offsets) {
135
139
  *term_vector = TERM_VECTOR_WITH_OFFSETS;
136
- } else if (v == sym_with_positions_offsets || v == Qnil) {
140
+ } else if (v == sym_with_positions_offsets) {
137
141
  *term_vector = TERM_VECTOR_WITH_POSITIONS_OFFSETS;
142
+ } else if (v == Qnil) {
143
+ /* leave as default */
138
144
  } else {
139
145
  rb_raise(rb_eArgError, ":%s isn't a valid argument for "
140
146
  ":term_vector. Please choose from [:no, :yes, "
@@ -507,9 +513,9 @@ frt_fis_add_field(int argc, VALUE *argv, VALUE self)
507
513
  {
508
514
  FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
509
515
  FieldInfo *fi;
510
- enum StoreValues store = STORE_YES;
511
- enum IndexValues index = INDEX_YES;
512
- enum TermVectorValues term_vector = TERM_VECTOR_WITH_POSITIONS_OFFSETS;
516
+ enum StoreValues store = fis->store;
517
+ enum IndexValues index = fis->index;
518
+ enum TermVectorValues term_vector = fis->term_vector;
513
519
  float boost = 1.0f;
514
520
  VALUE rname, roptions;
515
521
 
@@ -2134,6 +2140,21 @@ frt_ir_undelete_all(VALUE self)
2134
2140
  return self;
2135
2141
  }
2136
2142
 
2143
+ static VALUE
2144
+ frt_get_doc_range(IndexReader *ir, int pos, int len, int max)
2145
+ {
2146
+ VALUE ary;
2147
+ int i;
2148
+ max = min2(max, pos+len);
2149
+ len = max - pos;
2150
+ ary = rb_ary_new2(len);
2151
+ for (i = 0; i < len; i++) {
2152
+ RARRAY(ary)->ptr[i] = frt_get_lazy_doc(ir->get_lazy_doc(ir, i + pos));
2153
+ }
2154
+ RARRAY(ary)->len = len;
2155
+ return ary;
2156
+ }
2157
+
2137
2158
  /*
2138
2159
  * call-seq:
2139
2160
  * index_reader.get_document(doc_id) -> LazyDoc
@@ -2144,10 +2165,43 @@ frt_ir_undelete_all(VALUE self)
2144
2165
  * which are returned by the Searchers search methods.
2145
2166
  */
2146
2167
  static VALUE
2147
- frt_ir_get_doc(VALUE self, VALUE rdoc_id)
2168
+ frt_ir_get_doc(int argc, VALUE *argv, VALUE self)
2148
2169
  {
2149
2170
  IndexReader *ir = (IndexReader *)DATA_PTR(self);
2150
- return frt_get_lazy_doc(ir->get_lazy_doc(ir, FIX2INT(rdoc_id)));
2171
+ VALUE arg1, arg2;
2172
+ long pos, len;
2173
+ long max = ir->max_doc(ir);
2174
+ rb_scan_args(argc, argv, "11", &arg1, &arg2);
2175
+ if (argc == 1) {
2176
+ if (FIXNUM_P(arg1)) {
2177
+ pos = FIX2INT(arg1);
2178
+ pos = (pos < 0) ? (max + pos) : pos;
2179
+ if (pos < 0 || pos >= max) {
2180
+ rb_raise(rb_eArgError, ":%d is out of range [%d..%d] for "
2181
+ "IndexWriter#[]", pos, 0, max,
2182
+ rb_id2name(SYM2ID(argv)));
2183
+ }
2184
+ return frt_get_lazy_doc(ir->get_lazy_doc(ir, pos));
2185
+ }
2186
+
2187
+ /* check if idx is Range */
2188
+ switch (rb_range_beg_len(arg1, &pos, &len, max, 0)) {
2189
+ case Qfalse:
2190
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for "
2191
+ "IndexReader.get_document(index)",
2192
+ rb_id2name(SYM2ID(argv)));
2193
+ case Qnil:
2194
+ return Qnil;
2195
+ default:
2196
+ return frt_get_doc_range(ir, pos, len, max);
2197
+ }
2198
+ }
2199
+ else {
2200
+ pos = FIX2LONG(arg1);
2201
+ len = FIX2LONG(arg2);
2202
+ return frt_get_doc_range(ir, pos, len, max);
2203
+ }
2204
+ return Qnil;
2151
2205
  }
2152
2206
 
2153
2207
  /*
@@ -3043,8 +3097,8 @@ Init_IndexReader(void)
3043
3097
  rb_define_method(cIndexReader, "num_docs", frt_ir_num_docs, 0);
3044
3098
  rb_define_method(cIndexReader, "undelete_all", frt_ir_undelete_all, 0);
3045
3099
  rb_define_method(cIndexReader, "latest?", frt_ir_is_latest, 0);
3046
- rb_define_method(cIndexReader, "get_document", frt_ir_get_doc, 1);
3047
- rb_define_method(cIndexReader, "[]", frt_ir_get_doc, 1);
3100
+ rb_define_method(cIndexReader, "get_document", frt_ir_get_doc, -1);
3101
+ rb_define_method(cIndexReader, "[]", frt_ir_get_doc, -1);
3048
3102
  rb_define_method(cIndexReader, "term_vector", frt_ir_term_vector, 2);
3049
3103
  rb_define_method(cIndexReader, "term_vectors", frt_ir_term_vectors, 1);
3050
3104
  rb_define_method(cIndexReader, "term_docs", frt_ir_term_docs, 0);