te_rex 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/te_rex/stop_word.rb +118 -120
- data/lib/te_rex/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8fb50481c19a00e2bebb447eafb28f9ea5371978
|
4
|
+
data.tar.gz: b9aa26933ab953af6a5949dd333c52ad9e04dda4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f3c6a665de08e840db3bb23cfb9465bc3a84f9371ff325fdc01373936029fcc8b73441d2237b8fce4140d5aacfbb4b2796528804a6acd736370377a144044f56
|
7
|
+
data.tar.gz: 72b95f6e90af40b1ab3797e1b82afe9fe76432bedaa81adcea10a3eba48aa4b64ecb36e2ed4e083730bf51fa4019f671d518e6ea29f581db95d2f5cdeb980193
|
data/lib/te_rex/stop_word.rb
CHANGED
@@ -1,126 +1,124 @@
|
|
1
1
|
module TeRex
|
2
2
|
class StopWord
|
3
|
-
LIST = [
|
4
|
-
@enterprise && @dates_times && @connector
|
5
|
-
].uniq
|
6
3
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
4
|
+
ENTERPRISE = [
|
5
|
+
"-", #bayes_data should handle this but coming through: look at stemmer.
|
6
|
+
"amadeus",
|
7
|
+
"ean",
|
8
|
+
"error",
|
9
|
+
"expedia",
|
10
|
+
"java",
|
11
|
+
"json",
|
12
|
+
"orbitz",
|
13
|
+
"priceline",
|
14
|
+
"sabre",
|
15
|
+
"travelocity",
|
16
|
+
"xml",
|
17
|
+
"xmlst",
|
18
|
+
"xmlws"
|
19
|
+
]
|
20
|
+
DATES_TIMES = [
|
21
|
+
"january",
|
22
|
+
"february",
|
23
|
+
"march",
|
24
|
+
"april",
|
25
|
+
"may",
|
26
|
+
"june",
|
27
|
+
"july",
|
28
|
+
"august",
|
29
|
+
"september",
|
30
|
+
"october",
|
31
|
+
"november",
|
32
|
+
"december",
|
33
|
+
"jan",
|
34
|
+
"feb",
|
35
|
+
"mar",
|
36
|
+
"apr",
|
37
|
+
"aug",
|
38
|
+
"sept",
|
39
|
+
"nov",
|
40
|
+
"dec",
|
41
|
+
"monday",
|
42
|
+
"mon",
|
43
|
+
"tuesday",
|
44
|
+
"tue",
|
45
|
+
"wednesday",
|
46
|
+
"wed",
|
47
|
+
"thursday",
|
48
|
+
"thur",
|
49
|
+
"friday",
|
50
|
+
"fri",
|
51
|
+
"saturday",
|
52
|
+
"sat",
|
53
|
+
"sunday",
|
54
|
+
"sun",
|
55
|
+
"pm",
|
56
|
+
"am",
|
57
|
+
"0",
|
58
|
+
"1",
|
59
|
+
"2",
|
60
|
+
"3",
|
61
|
+
"4",
|
62
|
+
"5",
|
63
|
+
"6",
|
64
|
+
"7",
|
65
|
+
"8",
|
66
|
+
"9"
|
67
|
+
]
|
68
|
+
CONNECTOR = [
|
69
|
+
"a",
|
70
|
+
"all",
|
71
|
+
"am",
|
72
|
+
"an",
|
73
|
+
"and",
|
74
|
+
"are",
|
75
|
+
"as",
|
76
|
+
"at",
|
77
|
+
"be",
|
78
|
+
"been",
|
79
|
+
"by",
|
80
|
+
"can",
|
81
|
+
"do",
|
82
|
+
"does",
|
83
|
+
"doesn't",
|
84
|
+
"for",
|
85
|
+
"get",
|
86
|
+
"has",
|
87
|
+
"hotel",
|
88
|
+
"in",
|
89
|
+
"into",
|
90
|
+
"is",
|
91
|
+
"it",
|
92
|
+
"it's",
|
93
|
+
"its",
|
94
|
+
"of",
|
95
|
+
"on",
|
96
|
+
"or",
|
97
|
+
"so",
|
98
|
+
"sorry",
|
99
|
+
"than",
|
100
|
+
"that",
|
101
|
+
"that's",
|
102
|
+
"this",
|
103
|
+
"the",
|
104
|
+
"there",
|
105
|
+
"their",
|
106
|
+
"to",
|
107
|
+
"us",
|
108
|
+
"was",
|
109
|
+
"we",
|
110
|
+
"we're",
|
111
|
+
"were",
|
112
|
+
"what",
|
113
|
+
"what's",
|
114
|
+
"where",
|
115
|
+
"when",
|
116
|
+
"which",
|
117
|
+
"with",
|
118
|
+
"you",
|
119
|
+
"you've"
|
120
|
+
]
|
124
121
|
|
122
|
+
LIST = ENTERPRISE + DATES_TIMES + CONNECTOR
|
125
123
|
end
|
126
124
|
end
|
data/lib/te_rex/version.rb
CHANGED