wordlist 0.1.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/ruby.yml +27 -0
- data/.gitignore +6 -3
- data/ChangeLog.md +45 -1
- data/Gemfile +13 -0
- data/LICENSE.txt +1 -3
- data/README.md +266 -61
- data/Rakefile +7 -32
- data/benchmarks.rb +115 -0
- data/bin/wordlist +4 -7
- data/data/stop_words/ar.txt +104 -0
- data/data/stop_words/bg.txt +259 -0
- data/data/stop_words/bn.txt +363 -0
- data/data/stop_words/ca.txt +126 -0
- data/data/stop_words/cs.txt +138 -0
- data/data/stop_words/da.txt +101 -0
- data/data/stop_words/de.txt +129 -0
- data/data/stop_words/el.txt +79 -0
- data/data/stop_words/en.txt +175 -0
- data/data/stop_words/es.txt +178 -0
- data/data/stop_words/eu.txt +98 -0
- data/data/stop_words/fa.txt +332 -0
- data/data/stop_words/fi.txt +747 -0
- data/data/stop_words/fr.txt +116 -0
- data/data/stop_words/ga.txt +109 -0
- data/data/stop_words/gl.txt +160 -0
- data/data/stop_words/he.txt +499 -0
- data/data/stop_words/hi.txt +97 -0
- data/data/stop_words/hr.txt +179 -0
- data/data/stop_words/hu.txt +35 -0
- data/data/stop_words/hy.txt +45 -0
- data/data/stop_words/id.txt +357 -0
- data/data/stop_words/it.txt +134 -0
- data/data/stop_words/ja.txt +44 -0
- data/data/stop_words/ko.txt +677 -0
- data/data/stop_words/ku.txt +63 -0
- data/data/stop_words/lt.txt +507 -0
- data/data/stop_words/lv.txt +163 -0
- data/data/stop_words/mr.txt +99 -0
- data/data/stop_words/nl.txt +48 -0
- data/data/stop_words/no.txt +172 -0
- data/data/stop_words/pl.txt +138 -0
- data/data/stop_words/pt.txt +147 -0
- data/data/stop_words/ro.txt +281 -0
- data/data/stop_words/ru.txt +421 -0
- data/data/stop_words/sk.txt +173 -0
- data/data/stop_words/sv.txt +386 -0
- data/data/stop_words/th.txt +115 -0
- data/data/stop_words/tr.txt +114 -0
- data/data/stop_words/uk.txt +28 -0
- data/data/stop_words/ur.txt +513 -0
- data/data/stop_words/zh.txt +125 -0
- data/gemspec.yml +4 -10
- data/lib/wordlist/abstract_wordlist.rb +24 -0
- data/lib/wordlist/builder.rb +170 -138
- data/lib/wordlist/cli.rb +458 -0
- data/lib/wordlist/compression/reader.rb +72 -0
- data/lib/wordlist/compression/writer.rb +80 -0
- data/lib/wordlist/exceptions.rb +31 -0
- data/lib/wordlist/file.rb +176 -0
- data/lib/wordlist/format.rb +38 -0
- data/lib/wordlist/lexer/lang.rb +32 -0
- data/lib/wordlist/lexer/stop_words.rb +68 -0
- data/lib/wordlist/lexer.rb +218 -0
- data/lib/wordlist/list_methods.rb +462 -0
- data/lib/wordlist/modifiers/capitalize.rb +45 -0
- data/lib/wordlist/modifiers/downcase.rb +45 -0
- data/lib/wordlist/modifiers/gsub.rb +51 -0
- data/lib/wordlist/modifiers/modifier.rb +44 -0
- data/lib/wordlist/modifiers/mutate.rb +133 -0
- data/lib/wordlist/modifiers/mutate_case.rb +25 -0
- data/lib/wordlist/modifiers/sub.rb +97 -0
- data/lib/wordlist/modifiers/tr.rb +71 -0
- data/lib/wordlist/modifiers/upcase.rb +45 -0
- data/lib/wordlist/modifiers.rb +8 -0
- data/lib/wordlist/operators/binary_operator.rb +38 -0
- data/lib/wordlist/operators/concat.rb +47 -0
- data/lib/wordlist/operators/intersect.rb +55 -0
- data/lib/wordlist/operators/operator.rb +29 -0
- data/lib/wordlist/operators/power.rb +72 -0
- data/lib/wordlist/operators/product.rb +50 -0
- data/lib/wordlist/operators/subtract.rb +54 -0
- data/lib/wordlist/operators/unary_operator.rb +29 -0
- data/lib/wordlist/operators/union.rb +61 -0
- data/lib/wordlist/operators/unique.rb +52 -0
- data/lib/wordlist/operators.rb +7 -0
- data/lib/wordlist/unique_filter.rb +40 -61
- data/lib/wordlist/version.rb +1 -1
- data/lib/wordlist/words.rb +71 -0
- data/lib/wordlist.rb +103 -2
- data/spec/abstract_list_spec.rb +18 -0
- data/spec/builder_spec.rb +220 -76
- data/spec/cli_spec.rb +801 -0
- data/spec/compression/reader_spec.rb +137 -0
- data/spec/compression/writer_spec.rb +194 -0
- data/spec/file_spec.rb +258 -0
- data/spec/fixtures/wordlist.txt +15 -0
- data/spec/fixtures/wordlist.txt.bz2 +0 -0
- data/spec/fixtures/wordlist.txt.gz +0 -0
- data/spec/fixtures/wordlist.txt.xz +0 -0
- data/spec/fixtures/wordlist_with_ambiguous_format +3 -0
- data/spec/fixtures/wordlist_with_comments.txt +19 -0
- data/spec/fixtures/wordlist_with_empty_lines.txt +19 -0
- data/spec/format_spec.rb +50 -0
- data/spec/helpers/text.rb +3 -3
- data/spec/helpers/wordlist.rb +2 -2
- data/spec/lexer/lang_spec.rb +70 -0
- data/spec/lexer/stop_words_spec.rb +77 -0
- data/spec/lexer_spec.rb +652 -0
- data/spec/list_methods_spec.rb +181 -0
- data/spec/modifiers/capitalize_spec.rb +27 -0
- data/spec/modifiers/downcase_spec.rb +27 -0
- data/spec/modifiers/gsub_spec.rb +59 -0
- data/spec/modifiers/modifier_spec.rb +20 -0
- data/spec/modifiers/mutate_case_spec.rb +46 -0
- data/spec/modifiers/mutate_spec.rb +39 -0
- data/spec/modifiers/sub_spec.rb +98 -0
- data/spec/modifiers/tr_spec.rb +46 -0
- data/spec/modifiers/upcase_spec.rb +27 -0
- data/spec/operators/binary_operator_spec.rb +19 -0
- data/spec/operators/concat_spec.rb +26 -0
- data/spec/operators/intersect_spec.rb +37 -0
- data/spec/operators/operator_spec.rb +16 -0
- data/spec/operators/power_spec.rb +57 -0
- data/spec/operators/product_spec.rb +39 -0
- data/spec/operators/subtract_spec.rb +37 -0
- data/spec/operators/union_spec.rb +37 -0
- data/spec/operators/unique_spec.rb +25 -0
- data/spec/spec_helper.rb +2 -1
- data/spec/unique_filter_spec.rb +108 -18
- data/spec/wordlist_spec.rb +55 -3
- data/spec/words_spec.rb +41 -0
- metadata +183 -120
- data/lib/wordlist/builders/website.rb +0 -216
- data/lib/wordlist/builders.rb +0 -1
- data/lib/wordlist/flat_file.rb +0 -47
- data/lib/wordlist/list.rb +0 -162
- data/lib/wordlist/mutator.rb +0 -113
- data/lib/wordlist/parsers.rb +0 -74
- data/lib/wordlist/runners/list.rb +0 -116
- data/lib/wordlist/runners/runner.rb +0 -67
- data/lib/wordlist/runners.rb +0 -2
- data/scripts/benchmark +0 -59
- data/scripts/text/comedy_of_errors.txt +0 -4011
- data/spec/flat_file_spec.rb +0 -25
- data/spec/list_spec.rb +0 -58
- data/spec/mutator_spec.rb +0 -43
- data/spec/parsers_spec.rb +0 -118
@@ -0,0 +1,116 @@
|
|
1
|
+
alors
|
2
|
+
au
|
3
|
+
aucuns
|
4
|
+
aussi
|
5
|
+
autre
|
6
|
+
avant
|
7
|
+
avec
|
8
|
+
avoir
|
9
|
+
bon
|
10
|
+
car
|
11
|
+
ce
|
12
|
+
cela
|
13
|
+
ces
|
14
|
+
ceux
|
15
|
+
chaque
|
16
|
+
ci
|
17
|
+
comme
|
18
|
+
comment
|
19
|
+
dans
|
20
|
+
des
|
21
|
+
du
|
22
|
+
dedans
|
23
|
+
dehors
|
24
|
+
depuis
|
25
|
+
devrait
|
26
|
+
doit
|
27
|
+
donc
|
28
|
+
dos
|
29
|
+
début
|
30
|
+
elle
|
31
|
+
elles
|
32
|
+
en
|
33
|
+
encore
|
34
|
+
essai
|
35
|
+
est
|
36
|
+
et
|
37
|
+
eu
|
38
|
+
fait
|
39
|
+
faites
|
40
|
+
fois
|
41
|
+
font
|
42
|
+
hors
|
43
|
+
ici
|
44
|
+
il
|
45
|
+
ils
|
46
|
+
je
|
47
|
+
juste
|
48
|
+
la
|
49
|
+
le
|
50
|
+
les
|
51
|
+
leur
|
52
|
+
là
|
53
|
+
ma
|
54
|
+
maintenant
|
55
|
+
mais
|
56
|
+
mes
|
57
|
+
mien
|
58
|
+
moins
|
59
|
+
mon
|
60
|
+
mot
|
61
|
+
même
|
62
|
+
ni
|
63
|
+
nommés
|
64
|
+
notre
|
65
|
+
nous
|
66
|
+
ou
|
67
|
+
où
|
68
|
+
par
|
69
|
+
parce
|
70
|
+
pas
|
71
|
+
peut
|
72
|
+
peu
|
73
|
+
plupart
|
74
|
+
pour
|
75
|
+
pourquoi
|
76
|
+
quand
|
77
|
+
que
|
78
|
+
quel
|
79
|
+
quelle
|
80
|
+
quelles
|
81
|
+
quels
|
82
|
+
qui
|
83
|
+
sa
|
84
|
+
sans
|
85
|
+
ses
|
86
|
+
seulement
|
87
|
+
si
|
88
|
+
sien
|
89
|
+
son
|
90
|
+
sont
|
91
|
+
sous
|
92
|
+
soyez
|
93
|
+
sujet
|
94
|
+
sur
|
95
|
+
ta
|
96
|
+
tandis
|
97
|
+
tellement
|
98
|
+
tels
|
99
|
+
tes
|
100
|
+
ton
|
101
|
+
tous
|
102
|
+
tout
|
103
|
+
trop
|
104
|
+
très
|
105
|
+
tu
|
106
|
+
voient
|
107
|
+
vont
|
108
|
+
votre
|
109
|
+
vous
|
110
|
+
vu
|
111
|
+
ça
|
112
|
+
étaient
|
113
|
+
état
|
114
|
+
étions
|
115
|
+
été
|
116
|
+
être
|
@@ -0,0 +1,109 @@
|
|
1
|
+
a
|
2
|
+
ach
|
3
|
+
ag
|
4
|
+
agus
|
5
|
+
an
|
6
|
+
aon
|
7
|
+
ar
|
8
|
+
arna
|
9
|
+
as
|
10
|
+
b'
|
11
|
+
ba
|
12
|
+
beirt
|
13
|
+
bhúr
|
14
|
+
caoga
|
15
|
+
ceathair
|
16
|
+
ceathrar
|
17
|
+
chomh
|
18
|
+
chtó
|
19
|
+
chuig
|
20
|
+
chun
|
21
|
+
cois
|
22
|
+
céad
|
23
|
+
cúig
|
24
|
+
cúigear
|
25
|
+
d'
|
26
|
+
daichead
|
27
|
+
dar
|
28
|
+
de
|
29
|
+
deich
|
30
|
+
deichniúr
|
31
|
+
den
|
32
|
+
dhá
|
33
|
+
do
|
34
|
+
don
|
35
|
+
dtí
|
36
|
+
dá
|
37
|
+
dár
|
38
|
+
dó
|
39
|
+
faoi
|
40
|
+
faoin
|
41
|
+
faoina
|
42
|
+
faoinár
|
43
|
+
fara
|
44
|
+
fiche
|
45
|
+
gach
|
46
|
+
gan
|
47
|
+
go
|
48
|
+
gur
|
49
|
+
haon
|
50
|
+
hocht
|
51
|
+
i
|
52
|
+
iad
|
53
|
+
idir
|
54
|
+
in
|
55
|
+
ina
|
56
|
+
ins
|
57
|
+
inár
|
58
|
+
is
|
59
|
+
le
|
60
|
+
leis
|
61
|
+
lena
|
62
|
+
lenár
|
63
|
+
m'
|
64
|
+
mar
|
65
|
+
mo
|
66
|
+
mé
|
67
|
+
na
|
68
|
+
nach
|
69
|
+
naoi
|
70
|
+
naonúr
|
71
|
+
ná
|
72
|
+
ní
|
73
|
+
níor
|
74
|
+
nó
|
75
|
+
nócha
|
76
|
+
ocht
|
77
|
+
ochtar
|
78
|
+
os
|
79
|
+
roimh
|
80
|
+
sa
|
81
|
+
seacht
|
82
|
+
seachtar
|
83
|
+
seachtó
|
84
|
+
seasca
|
85
|
+
seisear
|
86
|
+
siad
|
87
|
+
sibh
|
88
|
+
sinn
|
89
|
+
sna
|
90
|
+
sé
|
91
|
+
sí
|
92
|
+
tar
|
93
|
+
thar
|
94
|
+
thú
|
95
|
+
triúr
|
96
|
+
trí
|
97
|
+
trína
|
98
|
+
trínár
|
99
|
+
tríocha
|
100
|
+
tú
|
101
|
+
um
|
102
|
+
ár
|
103
|
+
é
|
104
|
+
éis
|
105
|
+
í
|
106
|
+
ó
|
107
|
+
ón
|
108
|
+
óna
|
109
|
+
ónár
|
@@ -0,0 +1,160 @@
|
|
1
|
+
a
|
2
|
+
aínda
|
3
|
+
alí
|
4
|
+
aquel
|
5
|
+
aquela
|
6
|
+
aquelas
|
7
|
+
aqueles
|
8
|
+
aquilo
|
9
|
+
aquí
|
10
|
+
ao
|
11
|
+
aos
|
12
|
+
as
|
13
|
+
así
|
14
|
+
á
|
15
|
+
ben
|
16
|
+
cando
|
17
|
+
che
|
18
|
+
co
|
19
|
+
coa
|
20
|
+
comigo
|
21
|
+
con
|
22
|
+
connosco
|
23
|
+
contigo
|
24
|
+
convosco
|
25
|
+
coas
|
26
|
+
cos
|
27
|
+
cun
|
28
|
+
cuns
|
29
|
+
cunha
|
30
|
+
cunhas
|
31
|
+
da
|
32
|
+
dalgunha
|
33
|
+
dalgunhas
|
34
|
+
dalgún
|
35
|
+
dalgúns
|
36
|
+
das
|
37
|
+
de
|
38
|
+
del
|
39
|
+
dela
|
40
|
+
delas
|
41
|
+
deles
|
42
|
+
desde
|
43
|
+
deste
|
44
|
+
do
|
45
|
+
dos
|
46
|
+
dun
|
47
|
+
duns
|
48
|
+
dunha
|
49
|
+
dunhas
|
50
|
+
e
|
51
|
+
el
|
52
|
+
ela
|
53
|
+
elas
|
54
|
+
eles
|
55
|
+
en
|
56
|
+
era
|
57
|
+
eran
|
58
|
+
esa
|
59
|
+
esas
|
60
|
+
ese
|
61
|
+
eses
|
62
|
+
esta
|
63
|
+
estar
|
64
|
+
estaba
|
65
|
+
está
|
66
|
+
están
|
67
|
+
este
|
68
|
+
estes
|
69
|
+
estiven
|
70
|
+
estou
|
71
|
+
eu
|
72
|
+
é
|
73
|
+
facer
|
74
|
+
foi
|
75
|
+
foron
|
76
|
+
fun
|
77
|
+
había
|
78
|
+
hai
|
79
|
+
iso
|
80
|
+
isto
|
81
|
+
la
|
82
|
+
las
|
83
|
+
lle
|
84
|
+
lles
|
85
|
+
lo
|
86
|
+
los
|
87
|
+
mais
|
88
|
+
me
|
89
|
+
meu
|
90
|
+
meus
|
91
|
+
min
|
92
|
+
miña
|
93
|
+
miñas
|
94
|
+
moi
|
95
|
+
na
|
96
|
+
nas
|
97
|
+
neste
|
98
|
+
nin
|
99
|
+
no
|
100
|
+
non
|
101
|
+
nos
|
102
|
+
nosa
|
103
|
+
nosas
|
104
|
+
noso
|
105
|
+
nosos
|
106
|
+
nós
|
107
|
+
nun
|
108
|
+
nunha
|
109
|
+
nuns
|
110
|
+
nunhas
|
111
|
+
o
|
112
|
+
os
|
113
|
+
ou
|
114
|
+
ó
|
115
|
+
ós
|
116
|
+
para
|
117
|
+
pero
|
118
|
+
pode
|
119
|
+
pois
|
120
|
+
pola
|
121
|
+
polas
|
122
|
+
polo
|
123
|
+
polos
|
124
|
+
por
|
125
|
+
que
|
126
|
+
se
|
127
|
+
senón
|
128
|
+
ser
|
129
|
+
seu
|
130
|
+
seus
|
131
|
+
sexa
|
132
|
+
sido
|
133
|
+
sobre
|
134
|
+
súa
|
135
|
+
súas
|
136
|
+
tamén
|
137
|
+
tan
|
138
|
+
te
|
139
|
+
ten
|
140
|
+
teñen
|
141
|
+
teño
|
142
|
+
ter
|
143
|
+
teu
|
144
|
+
teus
|
145
|
+
ti
|
146
|
+
tido
|
147
|
+
tiña
|
148
|
+
tiven
|
149
|
+
túa
|
150
|
+
túas
|
151
|
+
un
|
152
|
+
unha
|
153
|
+
unhas
|
154
|
+
uns
|
155
|
+
vos
|
156
|
+
vosa
|
157
|
+
vosas
|
158
|
+
voso
|
159
|
+
vosos
|
160
|
+
vós
|