rere 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.classpath +260 -0
- data/.gitignore +28 -0
- data/.project +14 -0
- data/.ruby-version +1 -0
- data/.settings/org.eclim.prefs +3 -0
- data/.settings/org.eclipse.jdt.core.prefs +5 -0
- data/.settings/org.eclipse.jdt.ui.prefs +2 -0
- data/Gemfile +7 -0
- data/LICENSE +22 -0
- data/README.md +44 -0
- data/Rakefile +65 -0
- data/bin/solr +61 -0
- data/example/config/solr.yml +23 -0
- data/example/log/.gitkeep +0 -0
- data/example/solr/README.txt +63 -0
- data/example/solr/collection1/README.txt +50 -0
- data/example/solr/collection1/conf/admin-extra.html +24 -0
- data/example/solr/collection1/conf/admin-extra.menu-bottom.html +25 -0
- data/example/solr/collection1/conf/admin-extra.menu-top.html +25 -0
- data/example/solr/collection1/conf/currency.xml +67 -0
- data/example/solr/collection1/conf/elevate.xml +38 -0
- data/example/solr/collection1/conf/lang/contractions_ca.txt +8 -0
- data/example/solr/collection1/conf/lang/contractions_fr.txt +15 -0
- data/example/solr/collection1/conf/lang/contractions_ga.txt +5 -0
- data/example/solr/collection1/conf/lang/contractions_it.txt +23 -0
- data/example/solr/collection1/conf/lang/hyphenations_ga.txt +5 -0
- data/example/solr/collection1/conf/lang/stemdict_nl.txt +6 -0
- data/example/solr/collection1/conf/lang/stoptags_ja.txt +420 -0
- data/example/solr/collection1/conf/lang/stopwords_ar.txt +125 -0
- data/example/solr/collection1/conf/lang/stopwords_bg.txt +193 -0
- data/example/solr/collection1/conf/lang/stopwords_ca.txt +220 -0
- data/example/solr/collection1/conf/lang/stopwords_cz.txt +172 -0
- data/example/solr/collection1/conf/lang/stopwords_da.txt +108 -0
- data/example/solr/collection1/conf/lang/stopwords_de.txt +292 -0
- data/example/solr/collection1/conf/lang/stopwords_el.txt +78 -0
- data/example/solr/collection1/conf/lang/stopwords_en.txt +54 -0
- data/example/solr/collection1/conf/lang/stopwords_es.txt +354 -0
- data/example/solr/collection1/conf/lang/stopwords_eu.txt +99 -0
- data/example/solr/collection1/conf/lang/stopwords_fa.txt +313 -0
- data/example/solr/collection1/conf/lang/stopwords_fi.txt +95 -0
- data/example/solr/collection1/conf/lang/stopwords_fr.txt +184 -0
- data/example/solr/collection1/conf/lang/stopwords_ga.txt +110 -0
- data/example/solr/collection1/conf/lang/stopwords_gl.txt +161 -0
- data/example/solr/collection1/conf/lang/stopwords_hi.txt +235 -0
- data/example/solr/collection1/conf/lang/stopwords_hu.txt +209 -0
- data/example/solr/collection1/conf/lang/stopwords_hy.txt +46 -0
- data/example/solr/collection1/conf/lang/stopwords_id.txt +359 -0
- data/example/solr/collection1/conf/lang/stopwords_it.txt +301 -0
- data/example/solr/collection1/conf/lang/stopwords_ja.txt +127 -0
- data/example/solr/collection1/conf/lang/stopwords_lv.txt +172 -0
- data/example/solr/collection1/conf/lang/stopwords_nl.txt +117 -0
- data/example/solr/collection1/conf/lang/stopwords_no.txt +192 -0
- data/example/solr/collection1/conf/lang/stopwords_pt.txt +251 -0
- data/example/solr/collection1/conf/lang/stopwords_ro.txt +233 -0
- data/example/solr/collection1/conf/lang/stopwords_ru.txt +241 -0
- data/example/solr/collection1/conf/lang/stopwords_sv.txt +131 -0
- data/example/solr/collection1/conf/lang/stopwords_th.txt +119 -0
- data/example/solr/collection1/conf/lang/stopwords_tr.txt +212 -0
- data/example/solr/collection1/conf/lang/userdict_ja.txt +29 -0
- data/example/solr/collection1/conf/mapping-FoldToASCII.txt +3813 -0
- data/example/solr/collection1/conf/mapping-ISOLatin1Accent.txt +246 -0
- data/example/solr/collection1/conf/protwords.txt +21 -0
- data/example/solr/collection1/conf/schema.xml +1125 -0
- data/example/solr/collection1/conf/scripts.conf +24 -0
- data/example/solr/collection1/conf/solrconfig.xml +1816 -0
- data/example/solr/collection1/conf/spellings.txt +2 -0
- data/example/solr/collection1/conf/stopwords.txt +14 -0
- data/example/solr/collection1/conf/synonyms.txt +29 -0
- data/example/solr/collection1/conf/update-script.js +53 -0
- data/example/solr/collection1/conf/velocity/VM_global_library.vm +170 -0
- data/example/solr/collection1/conf/velocity/browse.vm +50 -0
- data/example/solr/collection1/conf/velocity/cluster.vm +9 -0
- data/example/solr/collection1/conf/velocity/clusterResults.vm +12 -0
- data/example/solr/collection1/conf/velocity/debug.vm +17 -0
- data/example/solr/collection1/conf/velocity/did_you_mean.vm +4 -0
- data/example/solr/collection1/conf/velocity/facet_fields.vm +15 -0
- data/example/solr/collection1/conf/velocity/facet_pivot.vm +3 -0
- data/example/solr/collection1/conf/velocity/facet_queries.vm +3 -0
- data/example/solr/collection1/conf/velocity/facet_ranges.vm +15 -0
- data/example/solr/collection1/conf/velocity/facets.vm +5 -0
- data/example/solr/collection1/conf/velocity/footer.vm +17 -0
- data/example/solr/collection1/conf/velocity/head.vm +32 -0
- data/example/solr/collection1/conf/velocity/header.vm +3 -0
- data/example/solr/collection1/conf/velocity/hit.vm +11 -0
- data/example/solr/collection1/conf/velocity/hitGrouped.vm +24 -0
- data/example/solr/collection1/conf/velocity/join-doc.vm +4 -0
- data/example/solr/collection1/conf/velocity/jquery.autocomplete.css +48 -0
- data/example/solr/collection1/conf/velocity/jquery.autocomplete.js +763 -0
- data/example/solr/collection1/conf/velocity/layout.vm +20 -0
- data/example/solr/collection1/conf/velocity/main.css +208 -0
- data/example/solr/collection1/conf/velocity/product-doc.vm +27 -0
- data/example/solr/collection1/conf/velocity/query.vm +42 -0
- data/example/solr/collection1/conf/velocity/queryGroup.vm +19 -0
- data/example/solr/collection1/conf/velocity/querySpatial.vm +40 -0
- data/example/solr/collection1/conf/velocity/richtext-doc.vm +114 -0
- data/example/solr/collection1/conf/velocity/suggest.vm +3 -0
- data/example/solr/collection1/conf/velocity/tabs.vm +6 -0
- data/example/solr/collection1/conf/xslt/example.xsl +132 -0
- data/example/solr/collection1/conf/xslt/example_atom.xsl +67 -0
- data/example/solr/collection1/conf/xslt/example_rss.xsl +66 -0
- data/example/solr/collection1/conf/xslt/luke.xsl +337 -0
- data/example/solr/collection1/conf/xslt/updateXml.xsl +70 -0
- data/example/solr/collection1/data/index/segments.gen +0 -0
- data/example/solr/collection1/data/index/segments_1 +0 -0
- data/example/solr/data/development/index/segments.gen +0 -0
- data/example/solr/data/development/index/segments_1 +0 -0
- data/example/solr/solr.xml +53 -0
- data/example/solr/zoo.cfg +17 -0
- data/lib/rere.rb +66 -0
- data/lib/rere/railtie.rb +8 -0
- data/lib/rere/server.rb +380 -0
- data/lib/rere/tasks/solr.rake +47 -0
- data/lib/rere/version.rb +3 -0
- data/pom.xml +168 -0
- data/rere.gemspec +26 -0
- data/server/README.txt +78 -0
- data/server/cloud-scripts/zkcli.bat +12 -0
- data/server/cloud-scripts/zkcli.sh +14 -0
- data/server/contexts/solr-jetty-context.xml +8 -0
- data/server/etc/create-solrtest.keystore.sh +37 -0
- data/server/etc/jetty.xml +205 -0
- data/server/etc/logging.properties +38 -0
- data/server/etc/solrtest.keystore +0 -0
- data/server/etc/webdefault.xml +527 -0
- data/server/exampledocs/books.csv +11 -0
- data/server/exampledocs/books.json +51 -0
- data/server/exampledocs/gb18030-example.xml +32 -0
- data/server/exampledocs/hd.xml +56 -0
- data/server/exampledocs/ipod_other.xml +60 -0
- data/server/exampledocs/ipod_video.xml +40 -0
- data/server/exampledocs/manufacturers.xml +75 -0
- data/server/exampledocs/mem.xml +77 -0
- data/server/exampledocs/money.xml +65 -0
- data/server/exampledocs/monitor.xml +35 -0
- data/server/exampledocs/monitor2.xml +34 -0
- data/server/exampledocs/mp500.xml +43 -0
- data/server/exampledocs/post.jar +0 -0
- data/server/exampledocs/post.sh +30 -0
- data/server/exampledocs/sd500.xml +38 -0
- data/server/exampledocs/solr.xml +38 -0
- data/server/exampledocs/test_utf8.sh +93 -0
- data/server/exampledocs/utf8-example.xml +42 -0
- data/server/exampledocs/vidcard.xml +62 -0
- data/server/lib/ext/jcl-over-slf4j-1.6.6.jar +0 -0
- data/server/lib/ext/jul-to-slf4j-1.6.6.jar +0 -0
- data/server/lib/ext/log4j-1.2.16.jar +0 -0
- data/server/lib/ext/slf4j-api-1.6.6.jar +0 -0
- data/server/lib/ext/slf4j-log4j12-1.6.6.jar +0 -0
- data/server/lib/jetty-continuation-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-deploy-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-http-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-io-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-jmx-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-security-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-server-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-servlet-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-util-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-webapp-8.1.8.v20121106.jar +0 -0
- data/server/lib/jetty-xml-8.1.8.v20121106.jar +0 -0
- data/server/lib/servlet-api-3.0.jar +0 -0
- data/server/resources/log4j.properties +19 -0
- data/server/solr/README.txt +63 -0
- data/server/solr/solr.xml +53 -0
- data/server/solr/zoo.cfg +17 -0
- data/server/start.jar +0 -0
- data/server/webapps/solr.war +0 -0
- data/solr/lib/solr-winds-0.1.jar +0 -0
- metadata +284 -0
@@ -0,0 +1,117 @@
|
|
1
|
+
| From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
|
2
|
+
| This file is distributed under the BSD License.
|
3
|
+
| See http://snowball.tartarus.org/license.php
|
4
|
+
| Also see http://www.opensource.org/licenses/bsd-license.html
|
5
|
+
| - Encoding was converted to UTF-8.
|
6
|
+
| - This notice was added.
|
7
|
+
|
8
|
+
| A Dutch stop word list. Comments begin with vertical bar. Each stop
|
9
|
+
| word is at the start of a line.
|
10
|
+
|
11
|
+
| This is a ranked list (commonest to rarest) of stopwords derived from
|
12
|
+
| a large sample of Dutch text.
|
13
|
+
|
14
|
+
| Dutch stop words frequently exhibit homonym clashes. These are indicated
|
15
|
+
| clearly below.
|
16
|
+
|
17
|
+
de | the
|
18
|
+
en | and
|
19
|
+
van | of, from
|
20
|
+
ik | I, the ego
|
21
|
+
te | (1) chez, at etc, (2) to, (3) too
|
22
|
+
dat | that, which
|
23
|
+
die | that, those, who, which
|
24
|
+
in | in, inside
|
25
|
+
een | a, an, one
|
26
|
+
hij | he
|
27
|
+
het | the, it
|
28
|
+
niet | not, nothing, naught
|
29
|
+
zijn | (1) to be, being, (2) his, one's, its
|
30
|
+
is | is
|
31
|
+
was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
|
32
|
+
op | on, upon, at, in, up, used up
|
33
|
+
aan | on, upon, to (as dative)
|
34
|
+
met | with, by
|
35
|
+
als | like, such as, when
|
36
|
+
voor | (1) before, in front of, (2) furrow
|
37
|
+
had | had, past tense all persons sing. of 'hebben' (have)
|
38
|
+
er | there
|
39
|
+
maar | but, only
|
40
|
+
om | round, about, for etc
|
41
|
+
hem | him
|
42
|
+
dan | then
|
43
|
+
zou | should/would, past tense all persons sing. of 'zullen'
|
44
|
+
of | or, whether, if
|
45
|
+
wat | what, something, anything
|
46
|
+
mijn | possessive and noun 'mine'
|
47
|
+
men | people, 'one'
|
48
|
+
dit | this
|
49
|
+
zo | so, thus, in this way
|
50
|
+
door | through by
|
51
|
+
over | over, across
|
52
|
+
ze | she, her, they, them
|
53
|
+
zich | oneself
|
54
|
+
bij | (1) a bee, (2) by, near, at
|
55
|
+
ook | also, too
|
56
|
+
tot | till, until
|
57
|
+
je | you
|
58
|
+
mij | me
|
59
|
+
uit | out of, from
|
60
|
+
der | Old Dutch form of 'van der' still found in surnames
|
61
|
+
daar | (1) there, (2) because
|
62
|
+
haar | (1) her, their, them, (2) hair
|
63
|
+
naar | (1) unpleasant, unwell etc, (2) towards, (3) as
|
64
|
+
heb | present first person sing. of 'to have'
|
65
|
+
hoe | how, why
|
66
|
+
heeft | present third person sing. of 'to have'
|
67
|
+
hebben | 'to have' and various parts thereof
|
68
|
+
deze | this
|
69
|
+
u | you
|
70
|
+
want | (1) for, (2) mitten, (3) rigging
|
71
|
+
nog | yet, still
|
72
|
+
zal | 'shall', first and third person sing. of verb 'zullen' (will)
|
73
|
+
me | me
|
74
|
+
zij | she, they
|
75
|
+
nu | now
|
76
|
+
ge | 'thou', still used in Belgium and south Netherlands
|
77
|
+
geen | none
|
78
|
+
omdat | because
|
79
|
+
iets | something, somewhat
|
80
|
+
worden | to become, grow, get
|
81
|
+
toch | yet, still
|
82
|
+
al | all, every, each
|
83
|
+
waren | (1) 'were' (2) to wander, (3) wares, (3)
|
84
|
+
veel | much, many
|
85
|
+
meer | (1) more, (2) lake
|
86
|
+
doen | to do, to make
|
87
|
+
toen | then, when
|
88
|
+
moet | noun 'spot/mote' and present form of 'to must'
|
89
|
+
ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
|
90
|
+
zonder | without
|
91
|
+
kan | noun 'can' and present form of 'to be able'
|
92
|
+
hun | their, them
|
93
|
+
dus | so, consequently
|
94
|
+
alles | all, everything, anything
|
95
|
+
onder | under, beneath
|
96
|
+
ja | yes, of course
|
97
|
+
eens | once, one day
|
98
|
+
hier | here
|
99
|
+
wie | who
|
100
|
+
werd | imperfect third person sing. of 'become'
|
101
|
+
altijd | always
|
102
|
+
doch | yet, but etc
|
103
|
+
wordt | present third person sing. of 'become'
|
104
|
+
wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
|
105
|
+
kunnen | to be able
|
106
|
+
ons | us/our
|
107
|
+
zelf | self
|
108
|
+
tegen | against, towards, at
|
109
|
+
na | after, near
|
110
|
+
reeds | already
|
111
|
+
wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
|
112
|
+
kon | could; past tense of 'to be able'
|
113
|
+
niets | nothing
|
114
|
+
uw | your
|
115
|
+
iemand | somebody
|
116
|
+
geweest | been; past participle of 'be'
|
117
|
+
andere | other
|
@@ -0,0 +1,192 @@
|
|
1
|
+
| From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
|
2
|
+
| This file is distributed under the BSD License.
|
3
|
+
| See http://snowball.tartarus.org/license.php
|
4
|
+
| Also see http://www.opensource.org/licenses/bsd-license.html
|
5
|
+
| - Encoding was converted to UTF-8.
|
6
|
+
| - This notice was added.
|
7
|
+
|
8
|
+
| A Norwegian stop word list. Comments begin with vertical bar. Each stop
|
9
|
+
| word is at the start of a line.
|
10
|
+
|
11
|
+
| This stop word list is for the dominant bokmål dialect. Words unique
|
12
|
+
| to nynorsk are marked *.
|
13
|
+
|
14
|
+
| Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005
|
15
|
+
|
16
|
+
og | and
|
17
|
+
i | in
|
18
|
+
jeg | I
|
19
|
+
det | it/this/that
|
20
|
+
at | to (w. inf.)
|
21
|
+
en | a/an
|
22
|
+
et | a/an
|
23
|
+
den | it/this/that
|
24
|
+
til | to
|
25
|
+
er | is/am/are
|
26
|
+
som | who/that
|
27
|
+
på | on
|
28
|
+
de | they / you(formal)
|
29
|
+
med | with
|
30
|
+
han | he
|
31
|
+
av | of
|
32
|
+
ikke | not
|
33
|
+
ikkje | not *
|
34
|
+
der | there
|
35
|
+
så | so
|
36
|
+
var | was/were
|
37
|
+
meg | me
|
38
|
+
seg | you
|
39
|
+
men | but
|
40
|
+
ett | one
|
41
|
+
har | have
|
42
|
+
om | about
|
43
|
+
vi | we
|
44
|
+
min | my
|
45
|
+
mitt | my
|
46
|
+
ha | have
|
47
|
+
hadde | had
|
48
|
+
hun | she
|
49
|
+
nå | now
|
50
|
+
over | over
|
51
|
+
da | when/as
|
52
|
+
ved | by/know
|
53
|
+
fra | from
|
54
|
+
du | you
|
55
|
+
ut | out
|
56
|
+
sin | your
|
57
|
+
dem | them
|
58
|
+
oss | us
|
59
|
+
opp | up
|
60
|
+
man | you/one
|
61
|
+
kan | can
|
62
|
+
hans | his
|
63
|
+
hvor | where
|
64
|
+
eller | or
|
65
|
+
hva | what
|
66
|
+
skal | shall/must
|
67
|
+
selv | self (reflective)
|
68
|
+
sjøl | self (reflective)
|
69
|
+
her | here
|
70
|
+
alle | all
|
71
|
+
vil | will
|
72
|
+
bli | become
|
73
|
+
ble | became
|
74
|
+
blei | became *
|
75
|
+
blitt | have become
|
76
|
+
kunne | could
|
77
|
+
inn | in
|
78
|
+
når | when
|
79
|
+
være | be
|
80
|
+
kom | come
|
81
|
+
noen | some
|
82
|
+
noe | some
|
83
|
+
ville | would
|
84
|
+
dere | you
|
85
|
+
som | who/which/that
|
86
|
+
deres | their/theirs
|
87
|
+
kun | only/just
|
88
|
+
ja | yes
|
89
|
+
etter | after
|
90
|
+
ned | down
|
91
|
+
skulle | should
|
92
|
+
denne | this
|
93
|
+
for | for/because
|
94
|
+
deg | you
|
95
|
+
si | hers/his
|
96
|
+
sine | hers/his
|
97
|
+
sitt | hers/his
|
98
|
+
mot | against
|
99
|
+
å | to
|
100
|
+
meget | much
|
101
|
+
hvorfor | why
|
102
|
+
dette | this
|
103
|
+
disse | these/those
|
104
|
+
uten | without
|
105
|
+
hvordan | how
|
106
|
+
ingen | none
|
107
|
+
din | your
|
108
|
+
ditt | your
|
109
|
+
blir | become
|
110
|
+
samme | same
|
111
|
+
hvilken | which
|
112
|
+
hvilke | which (plural)
|
113
|
+
sånn | such a
|
114
|
+
inni | inside/within
|
115
|
+
mellom | between
|
116
|
+
vår | our
|
117
|
+
hver | each
|
118
|
+
hvem | who
|
119
|
+
vors | us/ours
|
120
|
+
hvis | whose
|
121
|
+
både | both
|
122
|
+
bare | only/just
|
123
|
+
enn | than
|
124
|
+
fordi | as/because
|
125
|
+
før | before
|
126
|
+
mange | many
|
127
|
+
også | also
|
128
|
+
slik | just
|
129
|
+
vært | been
|
130
|
+
være | to be
|
131
|
+
båe | both *
|
132
|
+
begge | both
|
133
|
+
siden | since
|
134
|
+
dykk | your *
|
135
|
+
dykkar | yours *
|
136
|
+
dei | they *
|
137
|
+
deira | them *
|
138
|
+
deires | theirs *
|
139
|
+
deim | them *
|
140
|
+
di | your (fem.) *
|
141
|
+
då | as/when *
|
142
|
+
eg | I *
|
143
|
+
ein | a/an *
|
144
|
+
eit | a/an *
|
145
|
+
eitt | a/an *
|
146
|
+
elles | or *
|
147
|
+
honom | he *
|
148
|
+
hjå | at *
|
149
|
+
ho | she *
|
150
|
+
hoe | she *
|
151
|
+
henne | her
|
152
|
+
hennar | her/hers
|
153
|
+
hennes | hers
|
154
|
+
hoss | how *
|
155
|
+
hossen | how *
|
156
|
+
ikkje | not *
|
157
|
+
ingi | noone *
|
158
|
+
inkje | noone *
|
159
|
+
korleis | how *
|
160
|
+
korso | how *
|
161
|
+
kva | what/which *
|
162
|
+
kvar | where *
|
163
|
+
kvarhelst | where *
|
164
|
+
kven | who/whom *
|
165
|
+
kvi | why *
|
166
|
+
kvifor | why *
|
167
|
+
me | we *
|
168
|
+
medan | while *
|
169
|
+
mi | my *
|
170
|
+
mine | my *
|
171
|
+
mykje | much *
|
172
|
+
no | now *
|
173
|
+
nokon | some (masc./neut.) *
|
174
|
+
noka | some (fem.) *
|
175
|
+
nokor | some *
|
176
|
+
noko | some *
|
177
|
+
nokre | some *
|
178
|
+
si | his/hers *
|
179
|
+
sia | since *
|
180
|
+
sidan | since *
|
181
|
+
so | so *
|
182
|
+
somt | some *
|
183
|
+
somme | some *
|
184
|
+
um | about*
|
185
|
+
upp | up *
|
186
|
+
vere | be *
|
187
|
+
vore | was *
|
188
|
+
verte | become *
|
189
|
+
vort | become *
|
190
|
+
varte | became *
|
191
|
+
vart | became *
|
192
|
+
|
@@ -0,0 +1,251 @@
|
|
1
|
+
| From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
|
2
|
+
| This file is distributed under the BSD License.
|
3
|
+
| See http://snowball.tartarus.org/license.php
|
4
|
+
| Also see http://www.opensource.org/licenses/bsd-license.html
|
5
|
+
| - Encoding was converted to UTF-8.
|
6
|
+
| - This notice was added.
|
7
|
+
|
8
|
+
| A Portuguese stop word list. Comments begin with vertical bar. Each stop
|
9
|
+
| word is at the start of a line.
|
10
|
+
|
11
|
+
|
12
|
+
| The following is a ranked list (commonest to rarest) of stopwords
|
13
|
+
| deriving from a large sample of text.
|
14
|
+
|
15
|
+
| Extra words have been added at the end.
|
16
|
+
|
17
|
+
de | of, from
|
18
|
+
a | the; to, at; her
|
19
|
+
o | the; him
|
20
|
+
que | who, that
|
21
|
+
e | and
|
22
|
+
do | de + o
|
23
|
+
da | de + a
|
24
|
+
em | in
|
25
|
+
um | a
|
26
|
+
para | for
|
27
|
+
| é from SER
|
28
|
+
com | with
|
29
|
+
não | not, no
|
30
|
+
uma | a
|
31
|
+
os | the; them
|
32
|
+
no | em + o
|
33
|
+
se | himself etc
|
34
|
+
na | em + a
|
35
|
+
por | for
|
36
|
+
mais | more
|
37
|
+
as | the; them
|
38
|
+
dos | de + os
|
39
|
+
como | as, like
|
40
|
+
mas | but
|
41
|
+
| foi from SER
|
42
|
+
ao | a + o
|
43
|
+
ele | he
|
44
|
+
das | de + as
|
45
|
+
| tem from TER
|
46
|
+
à | a + a
|
47
|
+
seu | his
|
48
|
+
sua | her
|
49
|
+
ou | or
|
50
|
+
| ser from SER
|
51
|
+
quando | when
|
52
|
+
muito | much
|
53
|
+
| há from HAV
|
54
|
+
nos | em + os; us
|
55
|
+
já | already, now
|
56
|
+
| está from EST
|
57
|
+
eu | I
|
58
|
+
também | also
|
59
|
+
só | only, just
|
60
|
+
pelo | per + o
|
61
|
+
pela | per + a
|
62
|
+
até | up to
|
63
|
+
isso | that
|
64
|
+
ela | he
|
65
|
+
entre | between
|
66
|
+
| era from SER
|
67
|
+
depois | after
|
68
|
+
sem | without
|
69
|
+
mesmo | same
|
70
|
+
aos | a + os
|
71
|
+
| ter from TER
|
72
|
+
seus | his
|
73
|
+
quem | whom
|
74
|
+
nas | em + as
|
75
|
+
me | me
|
76
|
+
esse | that
|
77
|
+
eles | they
|
78
|
+
| estão from EST
|
79
|
+
você | you
|
80
|
+
| tinha from TER
|
81
|
+
| foram from SER
|
82
|
+
essa | that
|
83
|
+
num | em + um
|
84
|
+
nem | nor
|
85
|
+
suas | her
|
86
|
+
meu | my
|
87
|
+
às | a + as
|
88
|
+
minha | my
|
89
|
+
| têm from TER
|
90
|
+
numa | em + uma
|
91
|
+
pelos | per + os
|
92
|
+
elas | they
|
93
|
+
| havia from HAV
|
94
|
+
| seja from SER
|
95
|
+
qual | which
|
96
|
+
| será from SER
|
97
|
+
nós | we
|
98
|
+
| tenho from TER
|
99
|
+
lhe | to him, her
|
100
|
+
deles | of them
|
101
|
+
essas | those
|
102
|
+
esses | those
|
103
|
+
pelas | per + as
|
104
|
+
este | this
|
105
|
+
| fosse from SER
|
106
|
+
dele | of him
|
107
|
+
|
108
|
+
| other words. There are many contractions such as naquele = em+aquele,
|
109
|
+
| mo = me+o, but they are rare.
|
110
|
+
| Indefinite article plural forms are also rare.
|
111
|
+
|
112
|
+
tu | thou
|
113
|
+
te | thee
|
114
|
+
vocês | you (plural)
|
115
|
+
vos | you
|
116
|
+
lhes | to them
|
117
|
+
meus | my
|
118
|
+
minhas
|
119
|
+
teu | thy
|
120
|
+
tua
|
121
|
+
teus
|
122
|
+
tuas
|
123
|
+
nosso | our
|
124
|
+
nossa
|
125
|
+
nossos
|
126
|
+
nossas
|
127
|
+
|
128
|
+
dela | of her
|
129
|
+
delas | of them
|
130
|
+
|
131
|
+
esta | this
|
132
|
+
estes | these
|
133
|
+
estas | these
|
134
|
+
aquele | that
|
135
|
+
aquela | that
|
136
|
+
aqueles | those
|
137
|
+
aquelas | those
|
138
|
+
isto | this
|
139
|
+
aquilo | that
|
140
|
+
|
141
|
+
| forms of estar, to be (not including the infinitive):
|
142
|
+
estou
|
143
|
+
está
|
144
|
+
estamos
|
145
|
+
estão
|
146
|
+
estive
|
147
|
+
esteve
|
148
|
+
estivemos
|
149
|
+
estiveram
|
150
|
+
estava
|
151
|
+
estávamos
|
152
|
+
estavam
|
153
|
+
estivera
|
154
|
+
estivéramos
|
155
|
+
esteja
|
156
|
+
estejamos
|
157
|
+
estejam
|
158
|
+
estivesse
|
159
|
+
estivéssemos
|
160
|
+
estivessem
|
161
|
+
estiver
|
162
|
+
estivermos
|
163
|
+
estiverem
|
164
|
+
|
165
|
+
| forms of haver, to have (not including the infinitive):
|
166
|
+
hei
|
167
|
+
há
|
168
|
+
havemos
|
169
|
+
hão
|
170
|
+
houve
|
171
|
+
houvemos
|
172
|
+
houveram
|
173
|
+
houvera
|
174
|
+
houvéramos
|
175
|
+
haja
|
176
|
+
hajamos
|
177
|
+
hajam
|
178
|
+
houvesse
|
179
|
+
houvéssemos
|
180
|
+
houvessem
|
181
|
+
houver
|
182
|
+
houvermos
|
183
|
+
houverem
|
184
|
+
houverei
|
185
|
+
houverá
|
186
|
+
houveremos
|
187
|
+
houverão
|
188
|
+
houveria
|
189
|
+
houveríamos
|
190
|
+
houveriam
|
191
|
+
|
192
|
+
| forms of ser, to be (not including the infinitive):
|
193
|
+
sou
|
194
|
+
somos
|
195
|
+
são
|
196
|
+
era
|
197
|
+
éramos
|
198
|
+
eram
|
199
|
+
fui
|
200
|
+
foi
|
201
|
+
fomos
|
202
|
+
foram
|
203
|
+
fora
|
204
|
+
fôramos
|
205
|
+
seja
|
206
|
+
sejamos
|
207
|
+
sejam
|
208
|
+
fosse
|
209
|
+
fôssemos
|
210
|
+
fossem
|
211
|
+
for
|
212
|
+
formos
|
213
|
+
forem
|
214
|
+
serei
|
215
|
+
será
|
216
|
+
seremos
|
217
|
+
serão
|
218
|
+
seria
|
219
|
+
seríamos
|
220
|
+
seriam
|
221
|
+
|
222
|
+
| forms of ter, to have (not including the infinitive):
|
223
|
+
tenho
|
224
|
+
tem
|
225
|
+
temos
|
226
|
+
tém
|
227
|
+
tinha
|
228
|
+
tínhamos
|
229
|
+
tinham
|
230
|
+
tive
|
231
|
+
teve
|
232
|
+
tivemos
|
233
|
+
tiveram
|
234
|
+
tivera
|
235
|
+
tivéramos
|
236
|
+
tenha
|
237
|
+
tenhamos
|
238
|
+
tenham
|
239
|
+
tivesse
|
240
|
+
tivéssemos
|
241
|
+
tivessem
|
242
|
+
tiver
|
243
|
+
tivermos
|
244
|
+
tiverem
|
245
|
+
terei
|
246
|
+
terá
|
247
|
+
teremos
|
248
|
+
terão
|
249
|
+
teria
|
250
|
+
teríamos
|
251
|
+
teriam
|