ferret 0.10.4 → 0.10.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -569,7 +569,7 @@ static TokenStream *
569
569
  frt_get_cwrapped_rts(VALUE rts)
570
570
  {
571
571
  TokenStream *ts;
572
- if (rb_ivar_get(CLASS_OF(rts), id_cclass) == Qtrue) {
572
+ if (frt_is_cclass(rts) && DATA_PTR(rts)) {
573
573
  GET_TS(ts, rts);
574
574
  REF(ts);
575
575
  }
@@ -972,7 +972,7 @@ frt_stop_filter_init(int argc, VALUE *argv, VALUE self)
972
972
  * StemFilter.new(token_stream) -> token_stream
973
973
  * StemFilter.new(token_stream,
974
974
  * algorithm="english",
975
- * encoding=locale-specific) -> token_stream
975
+ * encoding="UTF-8") -> token_stream
976
976
  *
977
977
  * Create an StemFilter which uses a snowball stemmer (thankyou Martin
978
978
  * Porter) to stem words. You can optionally specify the algorithm (default:
@@ -1034,7 +1034,7 @@ static TokenStream *
1034
1034
  cwa_get_ts(Analyzer *a, char *field, char *text)
1035
1035
  {
1036
1036
  VALUE rts = rb_funcall(CWA(a)->ranalyzer, id_token_stream, 2,
1037
- rb_str_new2(field), rb_str_new2(text));
1037
+ rb_intern(field), rb_str_new2(text));
1038
1038
  return frt_get_cwrapped_rts(rts);
1039
1039
  }
1040
1040
 
@@ -1042,7 +1042,7 @@ Analyzer *
1042
1042
  frt_get_cwrapped_analyzer(VALUE ranalyzer)
1043
1043
  {
1044
1044
  Analyzer *a = NULL;
1045
- if (rb_ivar_get(CLASS_OF(ranalyzer), id_cclass) == Qtrue) {
1045
+ if (frt_is_cclass(ranalyzer) && DATA_PTR(ranalyzer)) {
1046
1046
  Data_Get_Struct(ranalyzer, Analyzer, a);
1047
1047
  REF(a);
1048
1048
  }
@@ -1230,7 +1230,7 @@ frt_a_standard_analyzer_init(int argc, VALUE *argv, VALUE self)
1230
1230
  bool lower;
1231
1231
  VALUE rlower, rstop_words;
1232
1232
  Analyzer *a;
1233
- rb_scan_args(argc, argv, "02", &rlower, &rstop_words);
1233
+ rb_scan_args(argc, argv, "02", &rstop_words, &rlower);
1234
1234
  lower = ((rlower == Qnil) ? true : RTEST(rlower));
1235
1235
  if (rstop_words != Qnil) {
1236
1236
  char **stop_words = get_stopwords(rstop_words);
@@ -1246,7 +1246,7 @@ frt_a_standard_analyzer_init(int argc, VALUE *argv, VALUE self)
1246
1246
 
1247
1247
  /*
1248
1248
  * call-seq:
1249
- * StandardAnalyzer.new(lower = true, stop_words = ENGLISH_STOP_WORDS)
1249
+ * StandardAnalyzer.new(stop_words=ENGLISH_STOP_WORDS, lower=true)
1250
1250
  * -> analyzer
1251
1251
  *
1252
1252
  * Create a new StandardAnalyzer which downcases tokens by default but can
@@ -1330,7 +1330,7 @@ frt_per_field_analyzer_add_field(VALUE self, VALUE rfield, VALUE ranalyzer)
1330
1330
  Data_Get_Struct(self, Analyzer, pfa);
1331
1331
  a = frt_get_cwrapped_analyzer(ranalyzer);
1332
1332
 
1333
- pfa_add_field(pfa, StringValuePtr(rfield), a);
1333
+ pfa_add_field(pfa, frt_field(rfield), a);
1334
1334
  return self;
1335
1335
  }
1336
1336
 
@@ -1483,7 +1483,8 @@ static void Init_Token(void)
1483
1483
  */
1484
1484
  static void Init_TokenStream(void)
1485
1485
  {
1486
- cTokenStream = frt_define_class_under(mAnalysis, "TokenStream", rb_cObject);
1486
+ cTokenStream = rb_define_class_under(mAnalysis, "TokenStream", rb_cObject);
1487
+ frt_mark_cclass(cTokenStream);
1487
1488
  rb_define_method(cTokenStream, "next", frt_ts_next, 0);
1488
1489
  rb_define_method(cTokenStream, "text=", frt_ts_set_text, 1);
1489
1490
  rb_define_method(cTokenStream, "text", frt_ts_get_text, 0);
@@ -1504,7 +1505,8 @@ static void Init_TokenStream(void)
1504
1505
  static void Init_AsciiLetterTokenizer(void)
1505
1506
  {
1506
1507
  cAsciiLetterTokenizer =
1507
- frt_define_class_under(mAnalysis, "AsciiLetterTokenizer", cTokenStream);
1508
+ rb_define_class_under(mAnalysis, "AsciiLetterTokenizer", cTokenStream);
1509
+ frt_mark_cclass(cAsciiLetterTokenizer);
1508
1510
  rb_define_alloc_func(cAsciiLetterTokenizer, frt_data_alloc);
1509
1511
  rb_define_method(cAsciiLetterTokenizer, "initialize",
1510
1512
  frt_a_letter_tokenizer_init, 1);
@@ -1526,7 +1528,8 @@ static void Init_AsciiLetterTokenizer(void)
1526
1528
  static void Init_LetterTokenizer(void)
1527
1529
  {
1528
1530
  cLetterTokenizer =
1529
- frt_define_class_under(mAnalysis, "LetterTokenizer", cTokenStream);
1531
+ rb_define_class_under(mAnalysis, "LetterTokenizer", cTokenStream);
1532
+ frt_mark_cclass(cLetterTokenizer);
1530
1533
  rb_define_alloc_func(cLetterTokenizer, frt_data_alloc);
1531
1534
  rb_define_method(cLetterTokenizer, "initialize",
1532
1535
  frt_letter_tokenizer_init, -1);
@@ -1546,8 +1549,9 @@ static void Init_LetterTokenizer(void)
1546
1549
  static void Init_AsciiWhiteSpaceTokenizer(void)
1547
1550
  {
1548
1551
  cAsciiWhiteSpaceTokenizer =
1549
- frt_define_class_under(mAnalysis, "AsciiWhiteSpaceTokenizer",
1552
+ rb_define_class_under(mAnalysis, "AsciiWhiteSpaceTokenizer",
1550
1553
  cTokenStream);
1554
+ frt_mark_cclass(cAsciiWhiteSpaceTokenizer);
1551
1555
  rb_define_alloc_func(cAsciiWhiteSpaceTokenizer, frt_data_alloc);
1552
1556
  rb_define_method(cAsciiWhiteSpaceTokenizer, "initialize",
1553
1557
  frt_a_whitespace_tokenizer_init, 1);
@@ -1567,7 +1571,8 @@ static void Init_AsciiWhiteSpaceTokenizer(void)
1567
1571
  static void Init_WhiteSpaceTokenizer(void)
1568
1572
  {
1569
1573
  cWhiteSpaceTokenizer =
1570
- frt_define_class_under(mAnalysis, "WhiteSpaceTokenizer", cTokenStream);
1574
+ rb_define_class_under(mAnalysis, "WhiteSpaceTokenizer", cTokenStream);
1575
+ frt_mark_cclass(cWhiteSpaceTokenizer);
1571
1576
  rb_define_alloc_func(cWhiteSpaceTokenizer, frt_data_alloc);
1572
1577
  rb_define_method(cWhiteSpaceTokenizer, "initialize",
1573
1578
  frt_whitespace_tokenizer_init, -1);
@@ -1588,7 +1593,8 @@ static void Init_WhiteSpaceTokenizer(void)
1588
1593
  static void Init_AsciiStandardTokenizer(void)
1589
1594
  {
1590
1595
  cAsciiStandardTokenizer =
1591
- frt_define_class_under(mAnalysis, "AsciiStandardTokenizer", cTokenStream);
1596
+ rb_define_class_under(mAnalysis, "AsciiStandardTokenizer", cTokenStream);
1597
+ frt_mark_cclass(cAsciiStandardTokenizer);
1592
1598
  rb_define_alloc_func(cAsciiStandardTokenizer, frt_data_alloc);
1593
1599
  rb_define_method(cAsciiStandardTokenizer, "initialize",
1594
1600
  frt_a_standard_tokenizer_init, 1);
@@ -1609,7 +1615,8 @@ static void Init_AsciiStandardTokenizer(void)
1609
1615
  static void Init_StandardTokenizer(void)
1610
1616
  {
1611
1617
  cStandardTokenizer =
1612
- frt_define_class_under(mAnalysis, "StandardTokenizer", cTokenStream);
1618
+ rb_define_class_under(mAnalysis, "StandardTokenizer", cTokenStream);
1619
+ frt_mark_cclass(cStandardTokenizer);
1613
1620
  rb_define_alloc_func(cStandardTokenizer, frt_data_alloc);
1614
1621
  rb_define_method(cStandardTokenizer, "initialize",
1615
1622
  frt_standard_tokenizer_init, 1);
@@ -1636,7 +1643,8 @@ static void Init_StandardTokenizer(void)
1636
1643
  static void Init_RegExpTokenizer(void)
1637
1644
  {
1638
1645
  cRegExpTokenizer =
1639
- frt_define_class_under(mAnalysis, "RegExpTokenizer", cTokenStream);
1646
+ rb_define_class_under(mAnalysis, "RegExpTokenizer", cTokenStream);
1647
+ frt_mark_cclass(cRegExpTokenizer);
1640
1648
  rtoken_re = rb_reg_new(TOKEN_RE, strlen(TOKEN_RE), 0);
1641
1649
  rb_define_const(cRegExpTokenizer, "REGEXP", rtoken_re);
1642
1650
  rb_define_alloc_func(cRegExpTokenizer, frt_data_alloc);
@@ -1664,7 +1672,8 @@ static void Init_RegExpTokenizer(void)
1664
1672
  static void Init_AsciiLowerCaseFilter(void)
1665
1673
  {
1666
1674
  cAsciiLowerCaseFilter =
1667
- frt_define_class_under(mAnalysis, "AsciiLowerCaseFilter", cTokenStream);
1675
+ rb_define_class_under(mAnalysis, "AsciiLowerCaseFilter", cTokenStream);
1676
+ frt_mark_cclass(cAsciiLowerCaseFilter);
1668
1677
  rb_define_alloc_func(cAsciiLowerCaseFilter, frt_data_alloc);
1669
1678
  rb_define_method(cAsciiLowerCaseFilter, "initialize",
1670
1679
  frt_a_lowercase_filter_init, 1);
@@ -1684,7 +1693,8 @@ static void Init_AsciiLowerCaseFilter(void)
1684
1693
  static void Init_LowerCaseFilter(void)
1685
1694
  {
1686
1695
  cLowerCaseFilter =
1687
- frt_define_class_under(mAnalysis, "LowerCaseFilter", cTokenStream);
1696
+ rb_define_class_under(mAnalysis, "LowerCaseFilter", cTokenStream);
1697
+ frt_mark_cclass(cLowerCaseFilter);
1688
1698
  rb_define_alloc_func(cLowerCaseFilter, frt_data_alloc);
1689
1699
  rb_define_method(cLowerCaseFilter, "initialize",
1690
1700
  frt_lowercase_filter_init, 1);
@@ -1706,7 +1716,8 @@ static void Init_LowerCaseFilter(void)
1706
1716
  static void Init_HyphenFilter(void)
1707
1717
  {
1708
1718
  cHyphenFilter =
1709
- frt_define_class_under(mAnalysis, "HyphenFilter", cTokenStream);
1719
+ rb_define_class_under(mAnalysis, "HyphenFilter", cTokenStream);
1720
+ frt_mark_cclass(cHyphenFilter);
1710
1721
  rb_define_alloc_func(cHyphenFilter, frt_data_alloc);
1711
1722
  rb_define_method(cHyphenFilter, "initialize", frt_hyphen_filter_init, 1);
1712
1723
  }
@@ -1725,7 +1736,8 @@ static void Init_HyphenFilter(void)
1725
1736
  static void Init_StopFilter(void)
1726
1737
  {
1727
1738
  cStopFilter =
1728
- frt_define_class_under(mAnalysis, "StopFilter", cTokenStream);
1739
+ rb_define_class_under(mAnalysis, "StopFilter", cTokenStream);
1740
+ frt_mark_cclass(cStopFilter);
1729
1741
  rb_define_alloc_func(cStopFilter, frt_data_alloc);
1730
1742
  rb_define_method(cStopFilter, "initialize",
1731
1743
  frt_stop_filter_init, -1);
@@ -1738,14 +1750,10 @@ static void Init_StopFilter(void)
1738
1750
  *
1739
1751
  * A StemFilter takes a term and transforms the term as per the SnowBall
1740
1752
  * stemming algorithm. Note: the input to the stemming filter must already
1741
- * be in lower case, so you will need to use LowerCaseFilter or
1742
- * LowerCaseTokenizer further down the Tokenizer chain in order for this to
1743
- * work properly!
1753
+ * be in lower case, so you will need to use LowerCaseFilter or lowercasing
1754
+ * Tokenizer further down the Tokenizer chain in order for this to work
1755
+ * properly!
1744
1756
  *
1745
- * To use this filter with other analyzers, you'll want to write an Analyzer
1746
- * class that sets up the TokenStream chain as you want it. To use this with
1747
- * LowerCaseTokenizer, for example, you'd write an analyzer like this:
1748
- *
1749
1757
  * === Available algorithms and encodings
1750
1758
  *
1751
1759
  * Algorithm Algorithm Pseudonyms Encoding
@@ -1766,6 +1774,10 @@ static void Init_StopFilter(void)
1766
1774
  *
1767
1775
  * === Example
1768
1776
  *
1777
+ * To use this filter with other analyzers, you'll want to write an Analyzer
1778
+ * class that sets up the TokenStream chain as you want it. To use this with
1779
+ * a lowercasing Tokenizer, for example, you'd write an analyzer like this:
1780
+ *
1769
1781
  * def MyAnalyzer < Analyzer
1770
1782
  * def token_stream(field, str)
1771
1783
  * return StemFilter.new(LowerCaseFilter.new(StandardTokenizer.new(str)))
@@ -1784,7 +1796,8 @@ static void Init_StopFilter(void)
1784
1796
  static void Init_StemFilter(void)
1785
1797
  {
1786
1798
  cStemFilter =
1787
- frt_define_class_under(mAnalysis, "StemFilter", cTokenStream);
1799
+ rb_define_class_under(mAnalysis, "StemFilter", cTokenStream);
1800
+ frt_mark_cclass(cStemFilter);
1788
1801
  rb_define_alloc_func(cStemFilter, frt_data_alloc);
1789
1802
  rb_define_method(cStemFilter, "initialize",
1790
1803
  frt_stem_filter_init, -1);
@@ -1827,7 +1840,8 @@ static void Init_StemFilter(void)
1827
1840
  static void Init_Analyzer(void)
1828
1841
  {
1829
1842
  cAnalyzer =
1830
- frt_define_class_under(mAnalysis, "Analyzer", rb_cObject);
1843
+ rb_define_class_under(mAnalysis, "Analyzer", rb_cObject);
1844
+ frt_mark_cclass(cAnalyzer);
1831
1845
  rb_define_alloc_func(cAnalyzer, frt_data_alloc);
1832
1846
  rb_define_method(cAnalyzer, "initialize", frt_letter_analyzer_init, -1);
1833
1847
  rb_define_method(cAnalyzer, "token_stream", frt_analyzer_token_stream, 2);
@@ -1864,7 +1878,8 @@ static void Init_Analyzer(void)
1864
1878
  static void Init_AsciiLetterAnalyzer(void)
1865
1879
  {
1866
1880
  cAsciiLetterAnalyzer =
1867
- frt_define_class_under(mAnalysis, "AsciiLetterAnalyzer", cAnalyzer);
1881
+ rb_define_class_under(mAnalysis, "AsciiLetterAnalyzer", cAnalyzer);
1882
+ frt_mark_cclass(cAsciiLetterAnalyzer);
1868
1883
  rb_define_alloc_func(cAsciiLetterAnalyzer, frt_data_alloc);
1869
1884
  rb_define_method(cAsciiLetterAnalyzer, "initialize",
1870
1885
  frt_a_letter_analyzer_init, -1);
@@ -1894,7 +1909,8 @@ static void Init_AsciiLetterAnalyzer(void)
1894
1909
  static void Init_LetterAnalyzer(void)
1895
1910
  {
1896
1911
  cLetterAnalyzer =
1897
- frt_define_class_under(mAnalysis, "LetterAnalyzer", cAnalyzer);
1912
+ rb_define_class_under(mAnalysis, "LetterAnalyzer", cAnalyzer);
1913
+ frt_mark_cclass(cLetterAnalyzer);
1898
1914
  rb_define_alloc_func(cLetterAnalyzer, frt_data_alloc);
1899
1915
  rb_define_method(cLetterAnalyzer, "initialize",
1900
1916
  frt_letter_analyzer_init, -1);
@@ -1930,7 +1946,8 @@ static void Init_LetterAnalyzer(void)
1930
1946
  static void Init_AsciiWhiteSpaceAnalyzer(void)
1931
1947
  {
1932
1948
  cAsciiWhiteSpaceAnalyzer =
1933
- frt_define_class_under(mAnalysis, "AsciiWhiteSpaceAnalyzer", cAnalyzer);
1949
+ rb_define_class_under(mAnalysis, "AsciiWhiteSpaceAnalyzer", cAnalyzer);
1950
+ frt_mark_cclass(cAsciiWhiteSpaceAnalyzer);
1934
1951
  rb_define_alloc_func(cAsciiWhiteSpaceAnalyzer, frt_data_alloc);
1935
1952
  rb_define_method(cAsciiWhiteSpaceAnalyzer, "initialize",
1936
1953
  frt_a_white_space_analyzer_init, -1);
@@ -1960,7 +1977,8 @@ static void Init_AsciiWhiteSpaceAnalyzer(void)
1960
1977
  static void Init_WhiteSpaceAnalyzer(void)
1961
1978
  {
1962
1979
  cWhiteSpaceAnalyzer =
1963
- frt_define_class_under(mAnalysis, "WhiteSpaceAnalyzer", cAnalyzer);
1980
+ rb_define_class_under(mAnalysis, "WhiteSpaceAnalyzer", cAnalyzer);
1981
+ frt_mark_cclass(cWhiteSpaceAnalyzer);
1964
1982
  rb_define_alloc_func(cWhiteSpaceAnalyzer, frt_data_alloc);
1965
1983
  rb_define_method(cWhiteSpaceAnalyzer, "initialize",
1966
1984
  frt_white_space_analyzer_init, -1);
@@ -1975,18 +1993,16 @@ static void Init_WhiteSpaceAnalyzer(void)
1975
1993
  * ascii-analyzers. If it were implemented in Ruby it would look like this;
1976
1994
  *
1977
1995
  * class AsciiStandardAnalyzer
1978
- * def initialize(lower = true, stop_words = ENGLISH_STOP_WORDS)
1996
+ * def initialize(stop_words = ENGLISH_STOP_WORDS, lower = true)
1979
1997
  * @lower = lower
1980
1998
  * @stop_words = stop_words
1981
1999
  * end
1982
2000
  *
1983
2001
  * def token_stream(field, str)
1984
- * if @lower
1985
- * return StopFilter.new(AsciiLowerCaseFilter.new(
1986
- * AsciiStandardTokenizer.new(str)), @stop_words)
1987
- * else
1988
- * return StopFilter.new(AsciiStandardTokenizer.new(str), @stop_words)
1989
- * end
2002
+ * ts = AsciiStandardTokenizer.new(str)
2003
+ * ts = AsciiLowerCaseFilter.new(ts) if @lower
2004
+ * ts = StopFilter.new(ts, @stop_words)
2005
+ * ts = HyphenFilter.new(ts)
1990
2006
  * end
1991
2007
  * end
1992
2008
  *
@@ -1998,7 +2014,8 @@ static void Init_WhiteSpaceAnalyzer(void)
1998
2014
  static void Init_AsciiStandardAnalyzer(void)
1999
2015
  {
2000
2016
  cAsciiStandardAnalyzer =
2001
- frt_define_class_under(mAnalysis, "AsciiStandardAnalyzer", cAnalyzer);
2017
+ rb_define_class_under(mAnalysis, "AsciiStandardAnalyzer", cAnalyzer);
2018
+ frt_mark_cclass(cAsciiStandardAnalyzer);
2002
2019
  rb_define_alloc_func(cAsciiStandardAnalyzer, frt_data_alloc);
2003
2020
  rb_define_method(cAsciiStandardAnalyzer, "initialize",
2004
2021
  frt_a_standard_analyzer_init, -1);
@@ -2013,13 +2030,16 @@ static void Init_AsciiStandardAnalyzer(void)
2013
2030
  * it were implemented in Ruby it would look like this;
2014
2031
  *
2015
2032
  * class StandardAnalyzer
2016
- * def initialize(lower = true, stop_words = ENGLISH_STOP_WORDS)
2033
+ * def initialize(stop_words = ENGLISH_STOP_WORDS, lower = true)
2017
2034
  * @lower = lower
2018
2035
  * @stop_words = stop_words
2019
2036
  * end
2020
2037
  *
2021
2038
  * def token_stream(field, str)
2022
- * return StopFilter.new(StandardTokenizer.new(str, @lower), @stop_words)
2039
+ * ts = StandardTokenizer.new(str)
2040
+ * ts = LowerCaseFilter.new(ts) if @lower
2041
+ * ts = StopFilter.new(ts, @stop_words)
2042
+ * ts = HyphenFilter.new(ts)
2023
2043
  * end
2024
2044
  * end
2025
2045
  *
@@ -2029,7 +2049,8 @@ static void Init_AsciiStandardAnalyzer(void)
2029
2049
  static void Init_StandardAnalyzer(void)
2030
2050
  {
2031
2051
  cStandardAnalyzer =
2032
- frt_define_class_under(mAnalysis, "StandardAnalyzer", cAnalyzer);
2052
+ rb_define_class_under(mAnalysis, "StandardAnalyzer", cAnalyzer);
2053
+ frt_mark_cclass(cStandardAnalyzer);
2033
2054
  rb_define_alloc_func(cStandardAnalyzer, frt_data_alloc);
2034
2055
  rb_define_method(cStandardAnalyzer, "initialize",
2035
2056
  frt_standard_analyzer_init, -1);
@@ -2058,7 +2079,8 @@ static void Init_StandardAnalyzer(void)
2058
2079
  static void Init_PerFieldAnalyzer(void)
2059
2080
  {
2060
2081
  cPerFieldAnalyzer =
2061
- frt_define_class_under(mAnalysis, "PerFieldAnalyzer", cAnalyzer);
2082
+ rb_define_class_under(mAnalysis, "PerFieldAnalyzer", cAnalyzer);
2083
+ frt_mark_cclass(cPerFieldAnalyzer);
2062
2084
  rb_define_alloc_func(cPerFieldAnalyzer, frt_data_alloc);
2063
2085
  rb_define_method(cPerFieldAnalyzer, "initialize",
2064
2086
  frt_per_field_analyzer_init, 1);
@@ -2098,7 +2120,8 @@ static void Init_PerFieldAnalyzer(void)
2098
2120
  static void Init_RegExpAnalyzer(void)
2099
2121
  {
2100
2122
  cRegExpAnalyzer =
2101
- frt_define_class_under(mAnalysis, "RegExpAnalyzer", cAnalyzer);
2123
+ rb_define_class_under(mAnalysis, "RegExpAnalyzer", cAnalyzer);
2124
+ frt_mark_cclass(cRegExpAnalyzer);
2102
2125
  rb_define_alloc_func(cRegExpAnalyzer, frt_data_alloc);
2103
2126
  rb_define_method(cRegExpAnalyzer, "initialize",
2104
2127
  frt_re_analyzer_init, -1);
@@ -94,10 +94,12 @@ frt_fi_get_params(VALUE roptions,
94
94
  if (Qnil != v) Check_Type(v, T_SYMBOL);
95
95
  if (v == sym_no || v == sym_false || v == Qfalse) {
96
96
  *store = STORE_NO;
97
- } else if (v == sym_yes || v == sym_true || v == Qtrue || v == Qnil) {
97
+ } else if (v == sym_yes || v == sym_true || v == Qtrue) {
98
98
  *store = STORE_YES;
99
99
  } else if (v == sym_compress || v == sym_compressed) {
100
100
  *store = STORE_COMPRESS;
101
+ } else if (v == Qnil) {
102
+ /* leave as default */
101
103
  } else {
102
104
  rb_raise(rb_eArgError, ":%s isn't a valid argument for :store."
103
105
  " Please choose from [:yes, :no, :compressed]",
@@ -108,7 +110,7 @@ frt_fi_get_params(VALUE roptions,
108
110
  if (Qnil != v) Check_Type(v, T_SYMBOL);
109
111
  if (v == sym_no || v == sym_false || v == Qfalse) {
110
112
  *index = INDEX_NO;
111
- } else if (v == sym_yes || v == sym_true || v == Qtrue || v == Qnil) {
113
+ } else if (v == sym_yes || v == sym_true || v == Qtrue) {
112
114
  *index = INDEX_YES;
113
115
  } else if (v == sym_untokenized) {
114
116
  *index = INDEX_UNTOKENIZED;
@@ -116,6 +118,8 @@ frt_fi_get_params(VALUE roptions,
116
118
  *index = INDEX_YES_OMIT_NORMS;
117
119
  } else if (v == sym_untokenized_omit_norms) {
118
120
  *index = INDEX_UNTOKENIZED_OMIT_NORMS;
121
+ } else if (v == Qnil) {
122
+ /* leave as default */
119
123
  } else {
120
124
  rb_raise(rb_eArgError, ":%s isn't a valid argument for :index."
121
125
  " Please choose from [:no, :yes, :untokenized, "
@@ -133,8 +137,10 @@ frt_fi_get_params(VALUE roptions,
133
137
  *term_vector = TERM_VECTOR_WITH_POSITIONS;
134
138
  } else if (v == sym_with_offsets) {
135
139
  *term_vector = TERM_VECTOR_WITH_OFFSETS;
136
- } else if (v == sym_with_positions_offsets || v == Qnil) {
140
+ } else if (v == sym_with_positions_offsets) {
137
141
  *term_vector = TERM_VECTOR_WITH_POSITIONS_OFFSETS;
142
+ } else if (v == Qnil) {
143
+ /* leave as default */
138
144
  } else {
139
145
  rb_raise(rb_eArgError, ":%s isn't a valid argument for "
140
146
  ":term_vector. Please choose from [:no, :yes, "
@@ -507,9 +513,9 @@ frt_fis_add_field(int argc, VALUE *argv, VALUE self)
507
513
  {
508
514
  FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
509
515
  FieldInfo *fi;
510
- enum StoreValues store = STORE_YES;
511
- enum IndexValues index = INDEX_YES;
512
- enum TermVectorValues term_vector = TERM_VECTOR_WITH_POSITIONS_OFFSETS;
516
+ enum StoreValues store = fis->store;
517
+ enum IndexValues index = fis->index;
518
+ enum TermVectorValues term_vector = fis->term_vector;
513
519
  float boost = 1.0f;
514
520
  VALUE rname, roptions;
515
521
 
@@ -2134,6 +2140,21 @@ frt_ir_undelete_all(VALUE self)
2134
2140
  return self;
2135
2141
  }
2136
2142
 
2143
+ static VALUE
2144
+ frt_get_doc_range(IndexReader *ir, int pos, int len, int max)
2145
+ {
2146
+ VALUE ary;
2147
+ int i;
2148
+ max = min2(max, pos+len);
2149
+ len = max - pos;
2150
+ ary = rb_ary_new2(len);
2151
+ for (i = 0; i < len; i++) {
2152
+ RARRAY(ary)->ptr[i] = frt_get_lazy_doc(ir->get_lazy_doc(ir, i + pos));
2153
+ }
2154
+ RARRAY(ary)->len = len;
2155
+ return ary;
2156
+ }
2157
+
2137
2158
  /*
2138
2159
  * call-seq:
2139
2160
  * index_reader.get_document(doc_id) -> LazyDoc
@@ -2144,10 +2165,43 @@ frt_ir_undelete_all(VALUE self)
2144
2165
  * which are returned by the Searchers search methods.
2145
2166
  */
2146
2167
  static VALUE
2147
- frt_ir_get_doc(VALUE self, VALUE rdoc_id)
2168
+ frt_ir_get_doc(int argc, VALUE *argv, VALUE self)
2148
2169
  {
2149
2170
  IndexReader *ir = (IndexReader *)DATA_PTR(self);
2150
- return frt_get_lazy_doc(ir->get_lazy_doc(ir, FIX2INT(rdoc_id)));
2171
+ VALUE arg1, arg2;
2172
+ long pos, len;
2173
+ long max = ir->max_doc(ir);
2174
+ rb_scan_args(argc, argv, "11", &arg1, &arg2);
2175
+ if (argc == 1) {
2176
+ if (FIXNUM_P(arg1)) {
2177
+ pos = FIX2INT(arg1);
2178
+ pos = (pos < 0) ? (max + pos) : pos;
2179
+ if (pos < 0 || pos >= max) {
2180
+ rb_raise(rb_eArgError, ":%d is out of range [%d..%d] for "
2181
+ "IndexWriter#[]", pos, 0, max,
2182
+ rb_id2name(SYM2ID(argv)));
2183
+ }
2184
+ return frt_get_lazy_doc(ir->get_lazy_doc(ir, pos));
2185
+ }
2186
+
2187
+ /* check if idx is Range */
2188
+ switch (rb_range_beg_len(arg1, &pos, &len, max, 0)) {
2189
+ case Qfalse:
2190
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for "
2191
+ "IndexReader.get_document(index)",
2192
+ rb_id2name(SYM2ID(argv)));
2193
+ case Qnil:
2194
+ return Qnil;
2195
+ default:
2196
+ return frt_get_doc_range(ir, pos, len, max);
2197
+ }
2198
+ }
2199
+ else {
2200
+ pos = FIX2LONG(arg1);
2201
+ len = FIX2LONG(arg2);
2202
+ return frt_get_doc_range(ir, pos, len, max);
2203
+ }
2204
+ return Qnil;
2151
2205
  }
2152
2206
 
2153
2207
  /*
@@ -3043,8 +3097,8 @@ Init_IndexReader(void)
3043
3097
  rb_define_method(cIndexReader, "num_docs", frt_ir_num_docs, 0);
3044
3098
  rb_define_method(cIndexReader, "undelete_all", frt_ir_undelete_all, 0);
3045
3099
  rb_define_method(cIndexReader, "latest?", frt_ir_is_latest, 0);
3046
- rb_define_method(cIndexReader, "get_document", frt_ir_get_doc, 1);
3047
- rb_define_method(cIndexReader, "[]", frt_ir_get_doc, 1);
3100
+ rb_define_method(cIndexReader, "get_document", frt_ir_get_doc, -1);
3101
+ rb_define_method(cIndexReader, "[]", frt_ir_get_doc, -1);
3048
3102
  rb_define_method(cIndexReader, "term_vector", frt_ir_term_vector, 2);
3049
3103
  rb_define_method(cIndexReader, "term_vectors", frt_ir_term_vectors, 1);
3050
3104
  rb_define_method(cIndexReader, "term_docs", frt_ir_term_docs, 0);