vidibus-words 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.bundle/config ADDED
@@ -0,0 +1,2 @@
1
+ ---
2
+ BUNDLE_DISABLE_SHARED_GEMS: "1"
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --colour
2
+ --format nested
data/Gemfile ADDED
@@ -0,0 +1,11 @@
1
+ source :rubygems
2
+
3
+ gem "rails", "~> 3.0.0"
4
+ gem "vidibus-core_extensions"
5
+
6
+ # Development dependecies
7
+ gem "jeweler"
8
+ gem "rake"
9
+ gem "rspec", "~> 2.0.0.beta.20"
10
+ gem "rr"
11
+ gem "relevance-rcov"
data/Gemfile.lock ADDED
@@ -0,0 +1,101 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ abstract (1.0.0)
5
+ actionmailer (3.0.1)
6
+ actionpack (= 3.0.1)
7
+ mail (~> 2.2.5)
8
+ actionpack (3.0.1)
9
+ activemodel (= 3.0.1)
10
+ activesupport (= 3.0.1)
11
+ builder (~> 2.1.2)
12
+ erubis (~> 2.6.6)
13
+ i18n (~> 0.4.1)
14
+ rack (~> 1.2.1)
15
+ rack-mount (~> 0.6.12)
16
+ rack-test (~> 0.5.4)
17
+ tzinfo (~> 0.3.23)
18
+ activemodel (3.0.1)
19
+ activesupport (= 3.0.1)
20
+ builder (~> 2.1.2)
21
+ i18n (~> 0.4.1)
22
+ activerecord (3.0.1)
23
+ activemodel (= 3.0.1)
24
+ activesupport (= 3.0.1)
25
+ arel (~> 1.0.0)
26
+ tzinfo (~> 0.3.23)
27
+ activeresource (3.0.1)
28
+ activemodel (= 3.0.1)
29
+ activesupport (= 3.0.1)
30
+ activesupport (3.0.1)
31
+ arel (1.0.1)
32
+ activesupport (~> 3.0.0)
33
+ builder (2.1.2)
34
+ diff-lcs (1.1.2)
35
+ erubis (2.6.6)
36
+ abstract (>= 1.0.0)
37
+ gemcutter (0.6.1)
38
+ git (1.2.5)
39
+ i18n (0.4.2)
40
+ jeweler (1.4.0)
41
+ gemcutter (>= 0.1.0)
42
+ git (>= 1.2.5)
43
+ rubyforge (>= 2.0.0)
44
+ json_pure (1.4.6)
45
+ mail (2.2.9)
46
+ activesupport (>= 2.3.6)
47
+ i18n (~> 0.4.1)
48
+ mime-types (~> 1.16)
49
+ treetop (~> 1.4.8)
50
+ mime-types (1.16)
51
+ polyglot (0.3.1)
52
+ rack (1.2.1)
53
+ rack-mount (0.6.13)
54
+ rack (>= 1.0.0)
55
+ rack-test (0.5.6)
56
+ rack (>= 1.0)
57
+ rails (3.0.1)
58
+ actionmailer (= 3.0.1)
59
+ actionpack (= 3.0.1)
60
+ activerecord (= 3.0.1)
61
+ activeresource (= 3.0.1)
62
+ activesupport (= 3.0.1)
63
+ bundler (~> 1.0.0)
64
+ railties (= 3.0.1)
65
+ railties (3.0.1)
66
+ actionpack (= 3.0.1)
67
+ activesupport (= 3.0.1)
68
+ rake (>= 0.8.4)
69
+ thor (~> 0.14.0)
70
+ rake (0.8.7)
71
+ relevance-rcov (0.9.2.1)
72
+ rr (1.0.2)
73
+ rspec (2.0.1)
74
+ rspec-core (~> 2.0.1)
75
+ rspec-expectations (~> 2.0.1)
76
+ rspec-mocks (~> 2.0.1)
77
+ rspec-core (2.0.1)
78
+ rspec-expectations (2.0.1)
79
+ diff-lcs (>= 1.1.2)
80
+ rspec-mocks (2.0.1)
81
+ rspec-core (~> 2.0.1)
82
+ rspec-expectations (~> 2.0.1)
83
+ rubyforge (2.0.4)
84
+ json_pure (>= 1.1.7)
85
+ thor (0.14.4)
86
+ treetop (1.4.8)
87
+ polyglot (>= 0.3.1)
88
+ tzinfo (0.3.23)
89
+ vidibus-core_extensions (0.3.11)
90
+
91
+ PLATFORMS
92
+ ruby
93
+
94
+ DEPENDENCIES
95
+ jeweler
96
+ rails (~> 3.0.0)
97
+ rake
98
+ relevance-rcov
99
+ rr
100
+ rspec (~> 2.0.0.beta.20)
101
+ vidibus-core_extensions
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Andre Pankratz
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,24 @@
1
+ = vidibus-stopwords
2
+
3
+ This gem provides handling of words. It ships with a list of stop words and allows extraction of keywords from a string.
4
+
5
+ This gem is part of the open source SOA framework Vidibus: http://vidibus.org
6
+
7
+
8
+ == Installation
9
+
10
+ Add the dependency to the Gemfile of your application:
11
+
12
+ gem "vidibus-stopwords"
13
+
14
+ Then call bundle install on your console.
15
+
16
+
17
+ = Usage
18
+
19
+ TODO: describe
20
+
21
+
22
+ == Copyright
23
+
24
+ Copyright (c) 2010 Andre Pankratz. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,37 @@
1
+ require "rubygems"
2
+ require "rake"
3
+ require "rake/rdoctask"
4
+ require "rspec"
5
+ require "rspec/core/rake_task"
6
+
7
+ begin
8
+ require "jeweler"
9
+ Jeweler::Tasks.new do |gem|
10
+ gem.name = "vidibus-words"
11
+ gem.summary = %Q{Tools for handling of words.}
12
+ gem.description = %Q{Contains stop words lists and methods to extract keywords from strings.}
13
+ gem.email = "andre@vidibus.com"
14
+ gem.homepage = "http://github.com/vidibus/vidibus-words"
15
+ gem.authors = ["Andre Pankratz"]
16
+ gem.add_dependency "rails", "~> 3.0.0"
17
+ gem.add_dependency "vidibus-core_extensions"
18
+ end
19
+ Jeweler::GemcutterTasks.new
20
+ rescue LoadError
21
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
22
+ end
23
+
24
+ Rspec::Core::RakeTask.new(:rcov) do |t|
25
+ t.pattern = "spec/**/*_spec.rb"
26
+ t.rcov = true
27
+ t.rcov_opts = ["--exclude", "^spec,/gems/"]
28
+ end
29
+
30
+ Rake::RDocTask.new do |rdoc|
31
+ version = File.exist?("VERSION") ? File.read("VERSION") : ""
32
+ rdoc.rdoc_dir = "rdoc"
33
+ rdoc.title = "vidibus-words #{version}"
34
+ rdoc.rdoc_files.include("README*")
35
+ rdoc.rdoc_files.include("lib/**/*.rb")
36
+ rdoc.options << "--charset=utf-8"
37
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,3 @@
1
+ de:
2
+ vidibus:
3
+ stopwords: [also, ab, aber, abgerufen, abgerufene, abgerufener, abgerufenes, acht, aehnlich, alle, allein, allem, allen, aller, allerdings, allerlei, alles, allg, allgemein, allmaehlich, allzu, als, alsbald, am, an, andere, anderem, anderen, anderer, andererseits, anderes, anderm, andern, andernfalls, anders, anerkannt, anerkannte, anerkannter, anerkanntes, anfangen, anfing, angefangen, angesetze, angesetzt, angesetzten, angesetzter, ansetzen, anstatt, arbeiten, auch, auf, aufgehoert, aufgrund, aufhoeren, aufhoerte, aufzusuchen, aus, ausdruecken, ausdrueckt, ausdrueckte, ausgenommen, aussen, ausser, ausserdem, ausserhalb, autor, background, bald, bearbeite, bearbeiten, bearbeitete, bearbeiteten, bedarf, beduerfen, bedurfte, been, befragen, befragte, befragten, befragter, begann, beginnen, begonnen, behalten, behielt, bei, beide, beiden, beiderlei, beides, beim, beinahe, beitragen, beitrugen, bekannt, bekannte, bekannter, bekennen, benutzt, bereits, berichten, berichtet, berichtete, berichteten, besonders, besser, bestehen, besteht, betraechtlich, bevor, bezueglich, bietet, bin, bis, bisher, bislang, bist, biz, bleiben, blieb, bloss, boeden, brachte, brachten, braeuchte, brauchen, braucht, bringen, bsp, bzw, ca, comment, da, dabei, dadurch, dafuer, dagegen, daher, dahin, damals, damit, danach, daneben, dank, danke, danken, dann, dannen, daran, darauf, daraus, darf, darfst, darin, darueber, darueberhinaus, darum, darunter, das, dass, dasselbe, davon, davor, dazu, dein, deine, deinem, deinen, deiner, deines, dem, demnach, demselben, den, denen, denn, dennoch, denselben, der, derart, derartig, derem, deren, derer, derjenige, derjenigen, derselbe, derselben, derzeit, des, deshalb, desselben, dessen, desto, deswegen, dich, die, diejenige, dies, diese, dieselbe, dieselben, diesem, diesen, dieser, dieses, diesseits, dinge, dir, direkt, direkte, direkten, direkter, doc, doch, doppelt, dort, dorther, dorthin, drauf, drei, dreissig, drin, dritte, drueber, drunter, du, duerfen, duerfte, dunklen, durch, durchaus, durfte, durften, eben, ebenfalls, ebenso, ehe, eher, eigenen, eigenes, eigentlich, ein, einbauen, eine, einem, einen, einer, einerseits, eines, einfach, einfuehren, einfuehrte, einfuehrten, eingesetzt, einig, einige, einigem, einigen, einiger, einigermassen, einiges, einmal, eins, einseitig, einseitige, einseitigen, einseitiger, einst, einstmals, einzig, elf, ende, entsprechend, entweder, er, ergaenze, ergaenzen, ergaenzte, ergaenzten, erhaelt, erhalten, erhielt, erhielten, erneut, eroeffne, eroeffnen, eroeffnet, eroeffnete, eroeffnetes, erst, erste, ersten, erster, es, etc, etliche, etwa, etwas, euch, euer, eure, eurem, euren, eurer, eures, fall, falls, fand, fast, ferner, finden, findest, findet, folgende, folgenden, folgender, folgendes, folglich, fordern, fordert, forderte, forderten, fortsetzen, fortsetzt, fortsetzte, fortsetzten, fragte, frei, freie, freies, fuenf, fuer, gab, gaengig, gaengige, gaengigen, gaengiger, gaengiges, gaenzlich, ganz, ganze, ganzem, ganzen, ganzer, ganzes, gar, gbr, geb, geben, geblieben, gebracht, gedurft, geehrt, geehrte, geehrten, geehrter, gefaelligst, gefaellt, gefallen, gefiel, gegeben, gegen, gehabt, gehen, geht, gekommen, gekonnt, gemacht, gemaess, gemocht, genommen, genug, gern, gesagt, gesehen, gestern, gestrige, getan, geteilt, geteilte, getragen, gewesen, gewissermassen, gewollt, geworden, ggf, gib, gibt, gleich, gleichwohl, gleichzeitig, gluecklicherweise, gmbh, gratulieren, gratuliert, gratulierte, gute, guten, hab, habe, haben, haett, haette, haetten, halb, hallo, hast, hat, hatte, hatten, hattest, hattet, heraus, herein, heute, heutige, hier, hiermit, hiesige, hin, hinein, hinten, hinter, hinterher, hoch, hoechstens, html, http, hundert, ich, igitt, ihm, ihn, ihnen, ihr, ihre, ihrem, ihren, ihrer, ihres, im, immer, immerhin, in, indem, indessen, info, infolge, innen, innerhalb, ins, insofern, inzwischen, irgend, irgendeine, irgendwas, irgendwen, irgendwer, irgendwie, irgendwo, ist, ja, jaehrig, jaehrige, jaehrigen, jaehriges, je, jede, jedem, jeden, jedenfalls, jeder, jederlei, jedes, jedoch, jedwede, jemand, jene, jenem, jenen, jener, jenes, jenseits, jetzt, kam, kann, kannst, kaum, kei, kein, keine, keinem, keinen, keiner, keinerlei, keines, keineswegs, klar, klare, klaren, klares, klein, kleinen, kleiner, kleines, koennen, koennt, koennte, koennten, komme, kommen, kommt, konkret, konkrete, konkreten, konkreter, konkretes, konnte, konnten, kuenftig, laengst, laengstens, lag, lagen, langsam, lassen, laut, lediglich, leer, legen, legte, legten, leicht, leider, lesen, letze, letzten, letztendlich, letztens, letztes, letztlich, lichten, liegt, liest, links, mache, machen, machst, macht, machte, machten, mag, magst, mal, man, manche, manchem, manchen, mancher, mancherorts, manches, manchmal, mann, margin, med, mehr, mehrere, mein, meine, meinem, meinen, meiner, meines, meist, meiste, meisten, meta, mich, mindestens, mir, mit, mithin, mochte, moechte, moechten, moechtest, moegen, moeglich, moegliche, moeglichen, moeglicher, moeglicherweise, morgen, morgige, muessen, muesst, muesste, muessten, muss, musst, musste, mussten, nach, nachdem, nacher, nachhinein, naechste, naemlich, nahm, natuerlich, neben, nebenan, nehmen, nein, nes, neu, neue, neuem, neuen, neuer, neues, neun, nicht, nichts, nie, niemals, niemand, nimm, nimmer, nimmt, nirgends, nirgendwo, noch, noetigenfalls, nuetzt, nun, nur, nutzen, nutzt, ob, oben, oberhalb, obgleich, obschon, obwohl, oder, oft, ohne, online, per, pfui, ploetzlich, pro, regelmaessig, rief, rund, saemtliche, sage, sagen, sagt, sagte, sagten, sagtest, sang, sangen, schaetzen, schaetzt, schaetzte, schaetzten, schlechter, schliesslich, schon, schreibe, schreiben, schreibens, schreiber, schwierig, sechs, sect, sehe, sehen, sehr, sehrwohl, seht, sei, seid, sein, seine, seinem, seinen, seiner, seines, seit, seitdem, seither, selber, selbst, senke, senken, senkt, senkte, senkten, setzen, setzt, setzte, setzten, sich, sicher, sicherlich, sie, sieben, siebte, siehe, sieht, sind, singen, singt, so, sobald, sodass, soeben, sofern, sofort, sog, sogar, solange, solch, solche, solchem, solchen, solcher, solches, soll, sollen, sollst, sollt, sollte, sollten, solltest, somit, sondern, sonst, sonstwo, sooft, soviel, soweit, sowie, sowohl, spaeter, spielen, startet, startete, starteten, statt, stattdessen, steht, steige, steigen, steigt, stets, stieg, stiegen, such, suchen, taet, tages, tat, tatsaechlich, tatsaechlichen, tatsaechlicher, tatsaechliches, tausend, teile, teilen, teilte, teilten, titel, total, traegt, trage, tragen, trotzdem, trug, tun, tust, tut, txt, uebel, ueber, ueberall, ueberallhin, ueberdies, uebermorgen, uebrig, uebrigens, um, umso, unbedingt, und, ungefaehr, unmoeglich, unmoegliche, unmoeglichen, unmoeglicher, unnoetig, uns, unser, unsere, unserem, unseren, unserer, unseres, unserm, unses, unten, unter, unterbrach, unterbrechen, unterhalb, unwichtig, usw, vergangen, vergangene, vergangener, vergangenes, vermag, vermoegen, vermutlich, viel, viele, vielen, vieler, vieles, vielleicht, vielmals, vier, voellig, vollstaendig, vom, von, vor, voran, vorbei, vorgestern, vorher, vorne, vorueber, waehrend, waehrenddessen, waer, waere, waeren, wann, war, waren, warst, was, weder, weg, wegen, weil, weiss, weiter, weitere, weiterem, weiteren, weiterer, weiteres, weiterhin, welche, welchem, welchen, welcher, welches, wem, wen, wenig, wenige, weniger, wenigstens, wenn, wenngleich, wer, werde, werden, werdet, wessen, wichtig, wie, wieder, wieso, wieviel, wiewohl, will, willst, wir, wird, wirklich, wirst, wo, wodurch, wogegen, woher, wohin, wohingegen, wohl, wohlweislich, wolle, wollen, wollt, wollte, wollten, wolltest, wolltet, womit, woraufhin, woraus, worin, wuerde, wuerden, wurde, wurden, zahlreich, zb, zehn, zeitweise, ziehen, zieht, zog, zogen, zu, zudem, zuerst, zufolge, zugleich, zuletzt, zum, zumal, zur, zurueck, zusammen, zuviel, zwanzig, zwar, zwei, zwischen, zwoelf]
@@ -0,0 +1,4 @@
1
+ en:
2
+ vidibus:
3
+ stopwords:
4
+ [a, able, about, above, across, after, afterwards, again, against, all, almost, alone, along, already, also, although, always, am, among, amongst, amoungst, amount, an, and, another, any, anyhow, anyone, anything, anyway, anywhere, are, around, as, at, back, be, became, because, become, becomes, becoming, been, before, beforehand, behind, being, below, beside, besides, between, beyond, bill, both, bottom, but, by, call, can, cannot, cant, co, computer, con, could, couldnt, cry, de, describe, detail, do, done, down, due, during, each, eg, eight, either, eleven, else, elsewhere, empty, enough, etc, even, ever, every, everyone, everything, everywhere, except, few, fifteen, fify, fill, find, fire, first, five, for, former, formerly, forty, found, four, from, front, full, further, get, give, go, had, has, hasnt, have, he, hence, her, here, hereafter, hereby, herein, hereupon, hers, him, his, how, however, hundred, i, ie, if, in, indeed, interest, into, is, it, its, keep, last, latter, latterly, least, less, ltd, made, many, may, me, meanwhile, might, mill, mine, more, moreover, most, mostly, move, much, must, my, name, namely, neither, never, nevertheless, next, nine, no, nobody, none, noone, nor, not, nothing, now, nowhere, of, off, often, on, once, one, only, onto, or, other, others, otherwise, our, ours, ourselves, out, over, own, part, per, perhaps, please, put, rather, re, same, see, seem, seemed, seeming, seems, serious, several, she, should, show, side, since, sincere, six, sixty, so, some, somehow, someone, something, sometime, sometimes, somewhere, still, such, system, take, ten, th, than, thank, thanks, thanx, that, the, their, them, themselves, then, thence, there, thereafter, thereby, therefore, therein, thereupon, these, they, thick, thin, third, this, those, though, three, through, throughout, thru, thus, to, together, too, top, toward, towards, twelve, twenty, two, un, under, until, up, upon, us, very, via, was, we, well, were, what, whatever, when, whence, whenever, where, whereafter, whereas, whereby, wherein, whereupon, wherever, whether, which, while, whither, who, whoever, whole, whom, whose, why, will, with, within, without, would, yet, you, your, yours, yourself, yourselves]
@@ -0,0 +1,4 @@
1
+ es:
2
+ vidibus:
3
+ stopwords:
4
+ [a, al, algo, algun, alguna, algunas, alguno, algunos, ambos, ampleamos, ante, antes, aquel, aquellas, aquellos, aqui, arriba, atras, bajo, bastante, bien, cada, cierta, ciertas, ciertos, como, con, conseguimos, conseguir, consigo, consigue, consiguen, consigues, contra, cual, cuando, de, del, dentro, desde, donde, dos, durante, e, el, ella, ellas, ellos, empleais, emplean, emplear, empleas, empleo, en, encima, entonces, entre, era, erais, eramos, eran, eras, eres, es, esa, esas, ese, eso, esos, esta, estaba, estabais, estabamos, estaban, estabas, estad, estada, estadas, estado, estados, estais, estamos, estan, estando, estar, estara, estaran, estaras, estare, estareis, estaremos, estaria, estariais, estariamos, estarian, estarias, estas, este, esteis, estemos, esten, estes, esto, estos, estoy, estuve, estuviera, estuvierais, estuvieramos, estuvieran, estuvieras, estuvieron, estuviese, estuvieseis, estuviesemos, estuviesen, estuvieses, estuvimos, estuviste, estuvisteis, estuvo, fin, fue, fuera, fuerais, fueramos, fueran, fueras, fueron, fuese, fueseis, fuesemos, fuesen, fueses, fui, fuimos, fuiste, fuisteis, gueno, ha, habeis, haber, habia, habiais, habiamos, habian, habias, habida, habidas, habido, habidos, habiendo, habra, habran, habras, habre, habreis, habremos, habria, habriais, habriamos, habrian, habrias, hace, haceis, hacemos, hacen, hacer, haces, hago, han, has, hasta, hay, haya, hayais, hayamos, hayan, hayas, he, hemos, hube, hubiera, hubierais, hubieramos, hubieran, hubieras, hubieron, hubiese, hubieseis, hubiesemos, hubiesen, hubieses, hubimos, hubiste, hubisteis, hubo, incluso, intenta, intentais, intentamos, intentan, intentar, intentas, intento, ir, la, largo, las, le, les, lo, los, mas, me, mi, mia, mias, mientras, mio, mios, mis, modo, mucho, muchos, muy, nada, ni, no, nos, nosotras, nosotros, nuestra, nuestras, nuestro, nuestros, o, os, otra, otras, otro, otros, para, pero, poco, podeis, podemos, poder, podria, podriais, podriamos, podrian, podrias, por, porque, primerodesde, puede, pueden, puedo, que, quien, quienes, sabe, sabeis, sabemos, saben, saber, sabes, se, sea, seais, seamos, sean, seas, ser, sera, seran, seras, sere, sereis, seremos, seria, seriais, seriamos, serian, serias, si, sido, siendo, sin, sobre, sois, solamente, solo, somos, son, soy, su, sus, suya, suyas, suyo, suyos, tambien, tanto, te, tendra, tendran, tendras, tendre, tendreis, tendremos, tendria, tendriais, tendriamos, tendrian, tendrias, tened, teneis, tenemos, tener, tenga, tengais, tengamos, tengan, tengas, tengo, tenia, teniais, teniamos, tenian, tenias, tenida, tenidas, tenido, tenidos, teniendo, ti, tiempo, tiene, tienen, tienes, todo, todos, trabaja, trabajais, trabajamos, trabajan, trabajar, trabajas, trabajo, tras, tu, tus, tuve, tuviera, tuvierais, tuvieramos, tuvieran, tuvieras, tuvieron, tuviese, tuvieseis, tuviesemos, tuviesen, tuvieses, tuvimos, tuviste, tuvisteis, tuvo, tuya, tuyas, tuyo, tuyos, ultimo, un, una, unas, uno, unos, usa, usais, usamos, usan, usar, usas, uso, va, vais, valor, vamos, van, vaya, verdad, verdaderacierto, verdadero, vosotras, vosotros, voy, vuestra, vuestras, vuestro, vuestros, y, ya, yo]
@@ -0,0 +1,101 @@
1
+ # encoding: utf-8
2
+ module Vidibus
3
+ class Words
4
+
5
+ class MissingLocaleError < StandardError; end
6
+
7
+ def initialize(input, loc = [])
8
+ @input = input
9
+ self.locale = loc
10
+ end
11
+
12
+ def input
13
+ @input
14
+ end
15
+
16
+ # Sets locale(s) to be used.
17
+ def locale=(input)
18
+ input = [input] unless input.is_a?(Array)
19
+ @locales = input
20
+ end
21
+
22
+ def locales
23
+ @locales || []
24
+ end
25
+
26
+ # Returns words from input input.
27
+ def list
28
+ @list ||= Vidibus::Words.words(input)
29
+ end
30
+ alias_method :to_a, :list
31
+
32
+ # Returns words ordered by usage.
33
+ def sort
34
+ @sort ||= Vidibus::Words.sort_by_occurrence(list)
35
+ end
36
+
37
+ # Returns top keywords from input string.
38
+ def keywords(limit = 20)
39
+ @keywords ||= {}
40
+ @keywords[limit] ||= begin
41
+ list = []
42
+ count = 0
43
+ _stopwords = Vidibus::Words.stopwords(*locales)
44
+ for word in sort
45
+ clean = word.permalink.gsub("-","")
46
+ unless _stopwords.include?(clean)
47
+ list << word
48
+ count += 1
49
+ break if count >= limit
50
+ end
51
+ end
52
+ list
53
+ end
54
+ end
55
+
56
+ class << self
57
+
58
+ # Returns a list of all stop words for given locale(s).
59
+ # If no locales are given, all available will be used.
60
+ def stopwords(*locales)
61
+ locales = I18n.available_locales if locales.empty?
62
+ stopwords = []
63
+ for locale in locales
64
+ translation = I18n.t("vidibus.stopwords", :locale => locale)
65
+ next if translation.is_a?(String)
66
+ stopwords << translation
67
+ end
68
+ stopwords.flatten.uniq
69
+ end
70
+
71
+ # Returns a list of words from given string.
72
+ def words(string)
73
+ allowed = [" ", "a-z", "A-Z", "0-9"] + String::LATIN_MAP.values
74
+ disallowed = ["¿", "¡"] # Add some disallowed chars that cannot be catched. TODO: Improve!
75
+ match = /[^#{allowed.join("")}]/
76
+ string.
77
+ gsub(/\s+/mu, " ").
78
+ gsub(/[#{disallowed.join}]/u, "").
79
+ gsub(/#{match}+ /u, " ").
80
+ gsub(/ #{match}+/u, " ").
81
+ gsub(/#{match}+$/u, "").
82
+ gsub(/^#{match}+/u, "").
83
+ split(/ /)
84
+ end
85
+
86
+ # Returns a list of words ordered by their occurrance.
87
+ # All words will be converted to downcase.
88
+ def sort_by_occurrence(list)
89
+ map = {}
90
+ count = [999, list.length].min
91
+ for word in list
92
+ word.downcase!
93
+ map[word] ||= count
94
+ map[word] += 1000
95
+ count -= 1 if count > 0
96
+ end
97
+ map.to_a.sort_by {|x| -x.last}.map {|x| x.first}
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,14 @@
1
+ require "rails"
2
+ require "vidibus-core_extensions"
3
+
4
+ $:.unshift(File.join(File.dirname(__FILE__), "vidibus"))
5
+ require "words"
6
+
7
+ # Start a Rails Engine to load translations containing stopwords.
8
+ if defined?(Rails)
9
+ module Vidibus
10
+ module WordsEngine
11
+ class Engine < ::Rails::Engine; end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,15 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
3
+
4
+ require "rubygems"
5
+ require "rspec"
6
+ require "rr"
7
+ require "active_support/core_ext"
8
+ require "vidibus-core_extensions"
9
+ require "vidibus-words"
10
+
11
+ RSpec.configure do |config|
12
+ config.mock_with :rr
13
+ end
14
+
15
+ I18n.load_path += Dir[File.join('config', 'locales', '**', '*.{rb,yml}')]
@@ -0,0 +1,170 @@
1
+ # encoding: utf-8
2
+ require "spec_helper"
3
+
4
+ describe "Vidibus::Words" do
5
+ describe "initialization" do
6
+ it "should require an input string" do
7
+ expect {Vidibus::Words.new}.to raise_error(ArgumentError)
8
+ end
9
+
10
+ it "should accept an additional argument to set locales" do
11
+ words = Vidibus::Words.new("hello", :en)
12
+ words.locales.should eql([:en])
13
+ end
14
+
15
+ it "should accept an additional list of locales" do
16
+ words = Vidibus::Words.new("hello", [:en, :de])
17
+ words.locales.should eql([:en, :de])
18
+ end
19
+ end
20
+
21
+ describe "to_a" do
22
+ it "should call Vidibus::Words.words with input string" do
23
+ stub(Vidibus::Words.words("Whazzup?"))
24
+ Vidibus::Words.new("Whazzup?").to_a
25
+ end
26
+ end
27
+
28
+ describe "sort" do
29
+ it "should call Vidibus::Words.sort_by_occurrence with list" do
30
+ words = Vidibus::Words.new("Whazzup?")
31
+ stub(Vidibus::Words.sort_by_occurrence(words.list))
32
+ words.sort
33
+ end
34
+ end
35
+
36
+ describe "keywords" do
37
+ let(:input) do
38
+ "El profesor de ajedrez puso fin a la discusión de sus alumnos:
39
+ -Hoy, lo más importante, es la concentración. Deberán abstraerse del entorno y sólo prestar atención al juego. ¡No se olviden que la semana que viene es la maratón de ajedrez en Buenos Aires y ustedes representarán al club!
40
+ -¡Pero, maestro…! No se puede jugar en el bar que está lleno de gente y menos con el loquito de Aníbal al lado.
41
+ -¡Más respeto, jovencito! Aníbal es el hijo del presidente y fanático del ajedrez. Además, hace tiempo que quiere presenciar una partida, de modo que le prometí a su padre que estaría presente en el entrenamiento. ¡Y a ustedes les vendrá muy bien para ensayar la concentración! Los espero esta tarde a las cinco –dijo dando media vuelta y por finalizados los cuestionamientos.
42
+ El bar del club El Alfil, a las cinco de la tarde, estaba muy concurrido por los socios que se reponían de las distintas actividades del día. La mesa dispuesta para el partido estaba instalada en una esquina del salón, un poco aislada de las otras, ocupadas por parroquianos que poca atención le prestaban a los novatos ajedrecistas. En una silla contigua, se sentaba un joven de expresión entusiasta que contrastaba con el semblante adusto de los jugadores. El profesor colocó el reloj sobre la mesa y dio por iniciada la partida. Diego, que jugaba con las blancas, abrió con la apertura Ruy López y detuvo su cronómetro. Enseguida se escuchó el grito de Aníbal:
43
+ -¡Gambito de dama! ¡Gambito de dama!
44
+ Marcelo le echó una mirada de reojo y respondió con la defensa berlinesa para las negras.
45
+ -¡Defensa siciliana! ¡Defensa Siciliana! –chilló Aníbal en el colmo de su exaltación.
46
+ Acodado en la barra, don Antonio observaba la escena y le traducía al Sordo su interpretación de los hechos:
47
+ -Parece que los pibes están jugando a las damas y el loquito de Aníbal les da instrucciones. Por la cara que tienen no les gustan mucho los consejos, pero siendo un juego de damas parece acertado eso de las gambas. Y las sicilianas… ¡Se las traen!
48
+ El Sordo, que además de escuchar poco veía menos, asintió con un movimiento de cabeza.
49
+ En la mesa, los ajedrecistas se esforzaban por no perderse entre los desvaríos de Aníbal y movían sus piezas y detenían sus cronómetros y los volvían a poner en marcha.
50
+ El profesor afirmaba con la testa convencido de que la ordalía les aseguraría el primer puesto en el torneo.
51
+ -¡Defensa india de dama! ¡Defensa india de dama! –vociferó el hijo del presidente desde la silla, obedeciendo la orden de su papá de no moverse.
52
+ -¡Ahora pide que venga una india a defender a la dama! Al fin de cuentas parece que no está tan loquito este Aníbal –le dijo don Antonio en la oreja al Sordo y después se empinó un trago de grapa.
53
+ Los jugadores, estimulados por el acoso del loco, no tardaban más de quince minutos en mover sus piezas. De un solo movimiento, Diego le comió dos peones a Marcelo.
54
+ -¡Peones muertos! ¡Peones muertos! –sollozó Aníbal que era muy sensible.
55
+ -¡Las mujeres son vengativas, Sordo! ¿Qué necesidad de matar a esos pobres laburantes si con despedirlos hubiera protegido a la dama? –Le tironea de la manga de la camisa mientras le dice en voz alta:- ¡Mirá, mirá! ¿No te dije? ¡Es un juego violento!
56
+ En el salón, los aspirantes a campeones perseguían al loquito entre las mesas bombardeándolo con peones, alfiles, caballos, torres, reyes y reinas. El profesor los seguía levantando las piezas en el camino, mientras Aníbal desgranaba a la carrera sus nociones de ajedrez:
57
+ -¡Apertura, medio juego, final! ¡Final, Finaaaal!
58
+ -¿Querés que te diga? Me quedo con el truco, que no mata a nadie –concluyó don Antonio."
59
+ end
60
+
61
+ let(:words) {Vidibus::Words.new(input)}
62
+
63
+ it "should return a list of words without stopwords, ordered by occurrence" do
64
+ words = Vidibus::Words.new("To tell a long story short, it's necessary to tell it briefly without fluff!")
65
+ words.keywords.should eql(%w[tell long story short necessary briefly fluff])
66
+ end
67
+
68
+ it "should only remove stopwords of given locale" do
69
+ words = Vidibus::Words.new("To tell a long story short, it's necessary to tell it briefly without fluff!")
70
+ words.locale = :de
71
+ words.keywords.should eql(%w[to tell a long story short it's necessary it briefly without fluff])
72
+ end
73
+
74
+ it "should return only 20 keywords by default" do
75
+ keywords = words.keywords
76
+ keywords.length.should eql(20)
77
+ keywords.should eql(%w[no aníbal dama defensa profesor ajedrez juego loquito sordo peones mesa don antonio parece piezas india concentración atención ustedes club])
78
+ end
79
+
80
+ it "should accept an optional length param" do
81
+ words.keywords(30).length.should eql(30)
82
+ end
83
+ end
84
+
85
+ describe ".stopwords" do
86
+ it "should return a list of stop words of all languages available" do
87
+ list = Vidibus::Words.stopwords
88
+ list.should include("also") # de
89
+ list.should include("able") # en
90
+ end
91
+
92
+ it "should return a list of stop words for given locale only" do
93
+ list = Vidibus::Words.stopwords(:de)
94
+ list.should include("also")
95
+ list.should_not include("able")
96
+ end
97
+
98
+ it "should accept multiple locales" do
99
+ list = Vidibus::Words.stopwords(:de, :en)
100
+ list.should include("also") # de
101
+ list.should include("able") # en
102
+ end
103
+
104
+ it "should return an empty array if no stop words are available for given locale" do
105
+ Vidibus::Words.stopwords(:fr).should be_empty
106
+ end
107
+ end
108
+
109
+ describe ".words" do
110
+ it "should return an array of words from given string" do
111
+ Vidibus::Words.words("Hello").should eql(%w[Hello])
112
+ end
113
+
114
+ it "should remove dates with slashes" do
115
+ Vidibus::Words.words("On 01/12/2011 we will party!").should eql(%w[On 01/12/2011 we will party])
116
+ end
117
+
118
+ it "should preserve dates with dashes" do
119
+ Vidibus::Words.words("On 12-01-2011 we will party!").should eql(%w[On 12-01-2011 we will party])
120
+ end
121
+
122
+ it "should preserve dates with dots" do
123
+ Vidibus::Words.words("On 12.01.2011 we will party!").should eql(%w[On 12.01.2011 we will party])
124
+ end
125
+
126
+ it "should preserve combined words" do
127
+ Vidibus::Words.words("sign-on").should eql(%w[sign-on])
128
+ end
129
+
130
+ it "should preserve decimals with dots" do
131
+ Vidibus::Words.words("10.5").should eql(%w[10.5])
132
+ end
133
+
134
+ it "should preserve decimals with commas" do
135
+ Vidibus::Words.words("10,5").should eql(%w[10,5])
136
+ end
137
+
138
+ it "should preserve apostrophs" do
139
+ Vidibus::Words.words("It's on!").should eql(%w[It's on])
140
+ end
141
+
142
+ it "should preserve special chars" do
143
+ Vidibus::Words.words("Hola señor").should eql(%w[Hola señor])
144
+ end
145
+
146
+ it "should remove non-word chars" do
147
+ Vidibus::Words.words("¿cómo está?").should eql(%w[cómo está])
148
+ end
149
+
150
+ it "should remove non-word chars within sentences" do
151
+ Vidibus::Words.words("Hola señor, ¿cómo está?").should eql(%w[Hola señor cómo está])
152
+ end
153
+
154
+ it "should remove double non-word chars" do
155
+ Vidibus::Words.words("-¡Defensa india de dama!-").should eql(%w[Defensa india de dama])
156
+ end
157
+ end
158
+
159
+ describe ".sort_by_occurrence" do
160
+ it "should sort a list of words by occurrence" do
161
+ words = Vidibus::Words.words("Children's song: Hey, hey Wickie, hey Wickie, hey!")
162
+ Vidibus::Words.sort_by_occurrence(words).should eql(%w[hey wickie children's song])
163
+ end
164
+
165
+ it "should also weigh the position of words" do
166
+ words = Vidibus::Words.words("third: first second third")
167
+ Vidibus::Words.sort_by_occurrence(words).should eql(%w[third first second])
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,65 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{vidibus-words}
8
+ s.version = "0.0.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Andre Pankratz"]
12
+ s.date = %q{2010-11-16}
13
+ s.description = %q{Contains stop words lists and methods to extract keywords from strings.}
14
+ s.email = %q{andre@vidibus.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".bundle/config",
21
+ ".document",
22
+ ".gitignore",
23
+ ".rspec",
24
+ "Gemfile",
25
+ "Gemfile.lock",
26
+ "LICENSE",
27
+ "README.rdoc",
28
+ "Rakefile",
29
+ "VERSION",
30
+ "config/locales/de.yml",
31
+ "config/locales/en.yml",
32
+ "config/locales/es.yml",
33
+ "lib/vidibus-words.rb",
34
+ "lib/vidibus/words.rb",
35
+ "spec/spec_helper.rb",
36
+ "spec/vidibus/words_spec.rb",
37
+ "vidibus-words.gemspec"
38
+ ]
39
+ s.homepage = %q{http://github.com/vidibus/vidibus-words}
40
+ s.rdoc_options = ["--charset=UTF-8"]
41
+ s.require_paths = ["lib"]
42
+ s.rubygems_version = %q{1.3.7}
43
+ s.summary = %q{Tools for handling of words.}
44
+ s.test_files = [
45
+ "spec/spec_helper.rb",
46
+ "spec/vidibus/words_spec.rb"
47
+ ]
48
+
49
+ if s.respond_to? :specification_version then
50
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
51
+ s.specification_version = 3
52
+
53
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
54
+ s.add_runtime_dependency(%q<rails>, ["~> 3.0.0"])
55
+ s.add_runtime_dependency(%q<vidibus-core_extensions>, [">= 0"])
56
+ else
57
+ s.add_dependency(%q<rails>, ["~> 3.0.0"])
58
+ s.add_dependency(%q<vidibus-core_extensions>, [">= 0"])
59
+ end
60
+ else
61
+ s.add_dependency(%q<rails>, ["~> 3.0.0"])
62
+ s.add_dependency(%q<vidibus-core_extensions>, [">= 0"])
63
+ end
64
+ end
65
+
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: vidibus-words
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Andre Pankratz
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-11-16 00:00:00 +01:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rails
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ hash: 7
30
+ segments:
31
+ - 3
32
+ - 0
33
+ - 0
34
+ version: 3.0.0
35
+ type: :runtime
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: vidibus-core_extensions
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ hash: 3
46
+ segments:
47
+ - 0
48
+ version: "0"
49
+ type: :runtime
50
+ version_requirements: *id002
51
+ description: Contains stop words lists and methods to extract keywords from strings.
52
+ email: andre@vidibus.com
53
+ executables: []
54
+
55
+ extensions: []
56
+
57
+ extra_rdoc_files:
58
+ - LICENSE
59
+ - README.rdoc
60
+ files:
61
+ - .bundle/config
62
+ - .document
63
+ - .gitignore
64
+ - .rspec
65
+ - Gemfile
66
+ - Gemfile.lock
67
+ - LICENSE
68
+ - README.rdoc
69
+ - Rakefile
70
+ - VERSION
71
+ - config/locales/de.yml
72
+ - config/locales/en.yml
73
+ - config/locales/es.yml
74
+ - lib/vidibus-words.rb
75
+ - lib/vidibus/words.rb
76
+ - spec/spec_helper.rb
77
+ - spec/vidibus/words_spec.rb
78
+ - vidibus-words.gemspec
79
+ has_rdoc: true
80
+ homepage: http://github.com/vidibus/vidibus-words
81
+ licenses: []
82
+
83
+ post_install_message:
84
+ rdoc_options:
85
+ - --charset=UTF-8
86
+ require_paths:
87
+ - lib
88
+ required_ruby_version: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ hash: 3
94
+ segments:
95
+ - 0
96
+ version: "0"
97
+ required_rubygems_version: !ruby/object:Gem::Requirement
98
+ none: false
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ hash: 3
103
+ segments:
104
+ - 0
105
+ version: "0"
106
+ requirements: []
107
+
108
+ rubyforge_project:
109
+ rubygems_version: 1.3.7
110
+ signing_key:
111
+ specification_version: 3
112
+ summary: Tools for handling of words.
113
+ test_files:
114
+ - spec/spec_helper.rb
115
+ - spec/vidibus/words_spec.rb