te_rex 0.0.14 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/te_rex/stop_word.rb +70 -52
- data/lib/te_rex/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4f48363d758ea116ca8660068ae10d5cee89ddcf
|
4
|
+
data.tar.gz: 186b23231afc32625bde651dcb32509e4125bd3a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 90a71e60cbb0530737c305025ccedcc26e73ea18c478e8ab4223153743fea14250e3768af67fdf94abf87431ad7657f03b106e821004f871351615170309b7c9
|
7
|
+
data.tar.gz: 98057754a7f75100df6176854a948ff07f6e47712777b1fc35716a8bcab3a93962102c093ddb2efa31db5c1c22839edc20111c5a6be3cda74ae0ead4f700d722
|
data/lib/te_rex/stop_word.rb
CHANGED
@@ -1,6 +1,74 @@
|
|
1
1
|
module TeRex
|
2
2
|
class StopWord
|
3
3
|
LIST = [
|
4
|
+
@enterprise && @dates_times && @connector
|
5
|
+
].uniq
|
6
|
+
|
7
|
+
@enterprise = [
|
8
|
+
"-", #bayes_data should handle this but coming through: look at stemmer.
|
9
|
+
"amadeus",
|
10
|
+
"ean",
|
11
|
+
"error",
|
12
|
+
"expedia",
|
13
|
+
"java",
|
14
|
+
"json",
|
15
|
+
"orbitz",
|
16
|
+
"priceline",
|
17
|
+
"sabre",
|
18
|
+
"travelocity",
|
19
|
+
"xml",
|
20
|
+
"xmlst",
|
21
|
+
"xmlws"
|
22
|
+
]
|
23
|
+
@dates_times = [
|
24
|
+
"january",
|
25
|
+
"february",
|
26
|
+
"march",
|
27
|
+
"april",
|
28
|
+
"may",
|
29
|
+
"june",
|
30
|
+
"july",
|
31
|
+
"august",
|
32
|
+
"september",
|
33
|
+
"october",
|
34
|
+
"november",
|
35
|
+
"december",
|
36
|
+
"jan",
|
37
|
+
"feb",
|
38
|
+
"mar",
|
39
|
+
"apr",
|
40
|
+
"aug",
|
41
|
+
"sept",
|
42
|
+
"nov",
|
43
|
+
"dec",
|
44
|
+
"monday",
|
45
|
+
"mon",
|
46
|
+
"tuesday",
|
47
|
+
"tue",
|
48
|
+
"wednesday",
|
49
|
+
"wed",
|
50
|
+
"thursday",
|
51
|
+
"thur",
|
52
|
+
"friday",
|
53
|
+
"fri",
|
54
|
+
"saturday",
|
55
|
+
"sat",
|
56
|
+
"sunday",
|
57
|
+
"sun",
|
58
|
+
"pm",
|
59
|
+
"am",
|
60
|
+
"0",
|
61
|
+
"1",
|
62
|
+
"2",
|
63
|
+
"3",
|
64
|
+
"4",
|
65
|
+
"5",
|
66
|
+
"6",
|
67
|
+
"7",
|
68
|
+
"8",
|
69
|
+
"9"
|
70
|
+
]
|
71
|
+
@connector = [
|
4
72
|
"a",
|
5
73
|
"all",
|
6
74
|
"am",
|
@@ -16,7 +84,6 @@ module TeRex
|
|
16
84
|
"do",
|
17
85
|
"does",
|
18
86
|
"doesn't",
|
19
|
-
"error",
|
20
87
|
"for",
|
21
88
|
"get",
|
22
89
|
"has",
|
@@ -51,58 +118,9 @@ module TeRex
|
|
51
118
|
"when",
|
52
119
|
"which",
|
53
120
|
"with",
|
54
|
-
"xml",
|
55
|
-
"xmlst",
|
56
|
-
"xmlws",
|
57
121
|
"you",
|
58
|
-
"you've"
|
59
|
-
"january",
|
60
|
-
"february",
|
61
|
-
"march",
|
62
|
-
"april",
|
63
|
-
"may",
|
64
|
-
"june",
|
65
|
-
"july",
|
66
|
-
"august",
|
67
|
-
"september",
|
68
|
-
"october",
|
69
|
-
"november",
|
70
|
-
"december",
|
71
|
-
"jan",
|
72
|
-
"feb",
|
73
|
-
"mar",
|
74
|
-
"apr",
|
75
|
-
"aug",
|
76
|
-
"sept",
|
77
|
-
"nov",
|
78
|
-
"dec",
|
79
|
-
"monday",
|
80
|
-
"mon",
|
81
|
-
"tuesday",
|
82
|
-
"tue",
|
83
|
-
"wednesday",
|
84
|
-
"wed",
|
85
|
-
"thursday",
|
86
|
-
"thur",
|
87
|
-
"friday",
|
88
|
-
"fri",
|
89
|
-
"saturday",
|
90
|
-
"sat",
|
91
|
-
"sunday",
|
92
|
-
"sun",
|
93
|
-
"pm",
|
94
|
-
"am",
|
95
|
-
"0",
|
96
|
-
"1",
|
97
|
-
"2",
|
98
|
-
"3",
|
99
|
-
"4",
|
100
|
-
"5",
|
101
|
-
"6",
|
102
|
-
"7",
|
103
|
-
"8",
|
104
|
-
"9",
|
105
|
-
"-" #bayes_data should handle this but coming through: look at stemmer.
|
122
|
+
"you've"
|
106
123
|
]
|
124
|
+
|
107
125
|
end
|
108
126
|
end
|
data/lib/te_rex/version.rb
CHANGED