scylla 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. data/Gemfile +4 -2
  2. data/Gemfile.lock +16 -1
  3. data/lib/scylla/classifier.rb +1 -1
  4. data/lib/scylla/generator.rb +16 -4
  5. data/lib/scylla/lms/afrikaans.lm +232 -232
  6. data/lib/scylla/lms/arabic.lm +175 -175
  7. data/lib/scylla/lms/bulgarian.lm +225 -225
  8. data/lib/scylla/lms/catalan.lm +309 -309
  9. data/lib/scylla/lms/danish.lm +167 -167
  10. data/lib/scylla/lms/english.lm +398 -398
  11. data/lib/scylla/lms/finnish.lm +237 -237
  12. data/lib/scylla/lms/french.lm +148 -148
  13. data/lib/scylla/lms/german.lm +258 -258
  14. data/lib/scylla/lms/greek.lm +236 -236
  15. data/lib/scylla/lms/hebrew.lm +154 -154
  16. data/lib/scylla/lms/hindi.lm +139 -139
  17. data/lib/scylla/lms/icelandic.lm +239 -239
  18. data/lib/scylla/lms/indonesian.lm +244 -244
  19. data/lib/scylla/lms/italian.lm +248 -248
  20. data/lib/scylla/lms/japanese.lm +90 -90
  21. data/lib/scylla/lms/korean.lm +306 -306
  22. data/lib/scylla/lms/norwegian.lm +193 -193
  23. data/lib/scylla/lms/polish.lm +241 -241
  24. data/lib/scylla/lms/portuguese.lm +232 -232
  25. data/lib/scylla/lms/romanian.lm +246 -246
  26. data/lib/scylla/lms/slovak.lm +242 -242
  27. data/lib/scylla/lms/slovenian.lm +229 -229
  28. data/lib/scylla/lms/spanish.lm +164 -164
  29. data/lib/scylla/lms/swedish.lm +157 -157
  30. data/lib/scylla/lms/tagalog.lm +247 -247
  31. data/lib/scylla/lms/thai.lm +252 -252
  32. data/lib/scylla/lms/turkish.lm +285 -285
  33. data/lib/scylla/lms/vietnamese.lm +250 -250
  34. data/lib/scylla/lms/welsh.lm +248 -248
  35. data/lib/scylla/resources.rb +1 -9
  36. data/lib/scylla.rb +4 -0
  37. data/scylla.gemspec +2 -120
  38. data/source_texts/english.txt +62 -27
  39. data/test/classifier_test.rb +1 -3
  40. data/test/fixtures/lms/danish.lm +173 -173
  41. data/test/fixtures/lms/english.lm +220 -220
  42. data/test/fixtures/lms/french.lm +175 -175
  43. data/test/fixtures/lms/german.lm +254 -254
  44. data/test/fixtures/lms/hindi.lm +139 -139
  45. data/test/fixtures/lms/italian.lm +236 -236
  46. data/test/fixtures/lms/japanese.lm +88 -88
  47. data/test/fixtures/lms/norwegian.lm +182 -182
  48. data/test/fixtures/lms/spanish.lm +164 -164
  49. data/test/fixtures/test_languages/spanish +0 -1
  50. data/test/generator_test.rb +13 -0
  51. data/test/helper.rb +2 -0
  52. metadata +18 -25
  53. data/.document +0 -5
  54. data/lib/scylla/lms/13375P33K.lm +0 -400
  55. data/scylla-0.1.0.gem +0 -0
  56. data/source_texts/13375P33K.txt +0 -199
  57. data/test/fixtures/lms/13375p33k.lm +0 -400
  58. data/test/fixtures/source_texts/13375P33K.txt +0 -199
data/scylla.gemspec CHANGED
@@ -5,7 +5,7 @@
6 do |s|
7 = %q{scylla}
- s.version = "0.5.0"
+ s.version = "0.6.0"
  s.required_rubygems_version =">= 0") if s.respond_to? :required_rubygems_version=
  s.authors = ["Ashwin Hegde"]
@@ -18,125 +18,7 @@ do |s|
- s.files = [
- ".document",
- "Gemfile",
- "Gemfile.lock",
- "LICENSE.txt",
- "README.rdoc",
- "Rakefile",
- "bin/scylla",
- "lib/scylla.rb",
- "lib/scylla/classifier.rb",
- "lib/scylla/generator.rb",
- "lib/scylla/lms/13375P33K.lm",
- "lib/scylla/lms/afrikaans.lm",
- "lib/scylla/lms/arabic.lm",
- "lib/scylla/lms/bulgarian.lm",
- "lib/scylla/lms/catalan.lm",
- "lib/scylla/lms/chinese.lm",
- "lib/scylla/lms/danish.lm",
- "lib/scylla/lms/english.lm",
- "lib/scylla/lms/finnish.lm",
- "lib/scylla/lms/french.lm",
- "lib/scylla/lms/german.lm",
- "lib/scylla/lms/greek.lm",
- "lib/scylla/lms/hebrew.lm",
- "lib/scylla/lms/hindi.lm",
- "lib/scylla/lms/icelandic.lm",
- "lib/scylla/lms/indonesian.lm",
- "lib/scylla/lms/italian.lm",
- "lib/scylla/lms/japanese.lm",
- "lib/scylla/lms/korean.lm",
- "lib/scylla/lms/norwegian.lm",
- "lib/scylla/lms/polish.lm",
- "lib/scylla/lms/portuguese.lm",
- "lib/scylla/lms/romanian.lm",
- "lib/scylla/lms/russian.lm",
- "lib/scylla/lms/slovak.lm",
- "lib/scylla/lms/slovenian.lm",
- "lib/scylla/lms/spanish.lm",
- "lib/scylla/lms/swedish.lm",
- "lib/scylla/lms/tagalog.lm",
- "lib/scylla/lms/thai.lm",
- "lib/scylla/lms/turkish.lm",
- "lib/scylla/lms/vietnamese.lm",
- "lib/scylla/lms/welsh.lm",
- "lib/scylla/loader.rb",
- "lib/scylla/resources.rb",
- "lib/scylla/string.rb",
- "lib/scylla/tasks.rb",
- "scylla-0.1.0.gem",
- "scylla.gemspec",
- "source_texts/13375P33K.txt",
- "source_texts/afrikaans.txt",
- "source_texts/arabic.txt",
- "source_texts/bulgarian.txt",
- "source_texts/catalan.txt",
- "source_texts/chinese.txt",
- "source_texts/danish.txt",
- "source_texts/english.txt",
- "source_texts/finnish.txt",
- "source_texts/french.txt",
- "source_texts/german.txt",
- "source_texts/greek.txt",
- "source_texts/hebrew.txt",
- "source_texts/hindi.txt",
- "source_texts/icelandic.txt",
- "source_texts/indonesian.txt",
- "source_texts/italian.txt",
- "source_texts/japanese.txt",
- "source_texts/korean.txt",
- "source_texts/norwegian.txt",
- "source_texts/polish.txt",
- "source_texts/portuguese.txt",
- "source_texts/romanian.txt",
- "source_texts/russian.txt",
- "source_texts/slovak.txt",
- "source_texts/slovenian.txt",
- "source_texts/spanish.txt",
- "source_texts/swedish.txt",
- "source_texts/tagalog.txt",
- "source_texts/thai.txt",
- "source_texts/turkish.txt",
- "source_texts/vietnamese.txt",
- "source_texts/welsh.txt",
- "test/classifier_test.rb",
- "test/fixtures/lms/13375p33k.lm",
- "test/fixtures/lms/danish.lm",
- "test/fixtures/lms/english.lm",
- "test/fixtures/lms/french.lm",
- "test/fixtures/lms/german.lm",
- "test/fixtures/lms/hindi.lm",
- "test/fixtures/lms/italian.lm",
- "test/fixtures/lms/japanese.lm",
- "test/fixtures/lms/norwegian.lm",
- "test/fixtures/lms/spanish.lm",
- "test/fixtures/source_texts/13375P33K.txt",
- "test/fixtures/source_texts/danish.txt",
- "test/fixtures/source_texts/english.txt",
- "test/fixtures/source_texts/french.txt",
- "test/fixtures/source_texts/german.txt",
- "test/fixtures/source_texts/hindi.txt",
- "test/fixtures/source_texts/italian.txt",
- "test/fixtures/source_texts/japanese.txt",
- "test/fixtures/source_texts/norwegian.txt",
- "test/fixtures/source_texts/spanish.txt",
- "test/fixtures/test_languages/english",
- "test/fixtures/test_languages/french",
- "test/fixtures/test_languages/german",
- "test/fixtures/test_languages/hindi",
- "test/fixtures/test_languages/italian",
- "test/fixtures/test_languages/japanese",
- "test/fixtures/test_languages/norwegian",
- "test/fixtures/test_languages/spanish",
- "test/generator_test.rb",
- "test/helper.rb",
- "test/language_test.rb",
- "test/loader_test.rb",
- "test/scylla_test.rb"
- ]
+ s.files = Dir.glob("**/**")
  s.homepage = %q{}
  s.licenses = ["MIT"]
  s.require_paths = ["lib"]
@@ -1,35 +1,70 @@
- Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.
+ English is a West Germanic language that arose in the Anglo-Saxon kingdoms of England and spread into what was to become south-east Scotland under the influence of the Anglian medieval kingdom of Northumbria. Following the economic, political, military, scientific, cultural, and colonial influence of Great Britain and the United Kingdom from the 18th century via the British Empire and the Commonwealth, and later via influence of the United States since the mid-20th century, it has been widely dispersed around the world, become the leading language of international discourse, and has acquired use as lingua franca in many regions. It is widely learned as a second language and used as an official language of the European Union and many Commonwealth countries, as well as in many world organizations. It is the third most natively spoken language in the world, after Mandarin Chinese and Spanish. It is the most widely spoken language across the world.
+ Historically, English originated from the fusion of languages and dialects, now collectively termed Old English, which were brought to the eastern coast of Great Britain by Germanic (Anglo-Saxon) settlers by the 5th century – with the word English being derived from the name of the Angles, and ultimately from their ancestral region of Angeln (in what is now Schleswig-Holstein). A significant number of English words are constructed based on roots from Latin, because Latin in some form was the lingua franca of the Christian Church and of European intellectual life. The language was further influenced by the Old Norse language due to Viking invasions in the 8th and 9th centuries.
+ The Norman conquest of England in the 11th century gave rise to heavy borrowings from Norman-French, and vocabulary and spelling conventions began to give the superficial appearance of a close relationship with Romance languages to what had now become Middle English. The Great Vowel Shift that began in the south of England in the 15th century is one of the historical events that mark the emergence of Modern English from Middle English.
+ Owing to the significant assimilation of various European languages throughout history, modern English contains a very large vocabulary. The Oxford English Dictionary lists over 250,000 distinct words, not including many technical or slang terms, or words that belong to multiple word classes.
- It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout. The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here', making it look like readable English. Many desktop publishing packages and web page editors now use Lorem Ipsum as their default model text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy. Various versions have evolved over the years, sometimes by accident, sometimes on purpose (injected humour and the like).
+ Significance
- Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from BC, making it over years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words, consectetur, from a Lorem Ipsum passage, and going through the cites of the word in classical literature, discovered the undoubtable source. Lorem Ipsum comes from sections .. and .. of "de Finibus Bonorum et Malorum" (The Extremes of Good and Evil) by Cicero, written in BC. This book is a treatise on the theory of ethics, very popular during the Renaissance. The first line of Lorem Ipsum, "Lorem ipsum dolor sit amet..", comes from a line in section ...
+ See also: English-speaking world and Anglosphere
+ Modern English, sometimes described as the first global lingua franca, is the dominant language or in some instances even the required international language of communications, science, information technology, business, seafaring, aviation, entertainment, radio and diplomacy. Its spread beyond the British Isles began with the growth of the British Empire, and by the late 19th century its reach was truly global . Following British colonisation from the 16th to 19th centuries, it became the dominant language in the United States, Canada, Australia and New Zealand. The growing economic and cultural influence of the US and its status as a global superpower since World War II have significantly accelerated the language's spread across the planet. English replaced German as the dominant language of science Nobel Prize laureates during the second half of the 20th century (compare the Evolution of Nobel Prizes by country). English equalled and may have surpassed French as the dominant language of diplomacy during the last half of the 19th century.
+ A working knowledge of English has become a requirement in a number of fields, occupations and professions such as medicine and computing; as a consequence over a billion people speak English to at least a basic level (see English language learning and teaching). It is one of six official languages of the United Nations.
+ One impact of the growth of English is the reduction of native linguistic diversity in many parts of the world. Its influence continues to play an important role in language attrition. Conversely, the natural internal variety of English along with creoles and pidgins have the potential to produce new distinct languages from English over time.
+ History
- The standard chunk of Lorem Ipsum used since the s is reproduced below for those interested. Sections .. and .. from "de Finibus Bonorum et Malorum" by Cicero are also reproduced in their exact original form, accompanied by English versions from the translation by H. Rackham.
+ Main article: History of the English language
+ English is a West Germanic language that originated from the Anglo-Frisian and Old Saxon dialects brought to Britain by Germanic settlers from various parts of what is now northwest Germany, Denmark and the Netherlands. Up to that point, in Roman Britain the native population is assumed to have spoken the Celtic language Brythonic alongside the acrolectal influence of Latin, from the 400-year Roman occupation.
+ One of these incoming Germanic tribes was the Angles, whom Bede believed to have relocated entirely to Britain. The names 'England' (from Engla land "Land of the Angles") and English (Old English Englisc) are derived from the name of this tribe—but Saxons, Jutes and a range of Germanic peoples from the coasts of Frisia, Lower Saxony, Jutland and Southern Sweden also moved to Britain in this era.
+ Initially, Old English was a diverse group of dialects, reflecting the varied origins of the Anglo-Saxon kingdoms of Great Britain but one of these dialects, Late West Saxon, eventually came to dominate, and it is in this that the poem Beowulf is written.
+ Old English was later transformed by two waves of invasion. The first was by speakers of the North Germanic language branch when Halfdan Ragnarsson and Ivar the Boneless started the conquering and colonisation of northern parts of the British Isles in the 8th and 9th centuries (see Danelaw). The second was by speakers of the Romance language Old Norman in the 11th century with the Norman conquest of England. Norman developed into Anglo-Norman, and then Anglo-French – and introduced a layer of words especially via the courts and government. As well as extending the lexicon with Scandinavian and Norman words these two events also simplified the grammar and transformed English into a borrowing language—more than normally open to accept new words from other languages.
+ The linguistic shifts in English following the Norman invasion produced what is now referred to as Middle English, with Geoffrey Chaucer's The Canterbury Tales being the best known work.
+ Throughout all this period Latin in some form was the lingua franca of European intellectual life, first the Medieval Latin of the Christian Church, but later the humanist Renaissance Latin, and those that wrote or copied texts in Latin commonly coined new terms from Latin to refer to things or concepts for which there was no existing native English word.
+ Modern English, which includes the works of William Shakespeare and the King James Bible, is generally dated from about 1550, and when the United Kingdom became a colonial power, English served as the lingua franca of the colonies of the British Empire. In the post-colonial period, some of the newly created nations which had multiple indigenous languages opted to continue using English as the lingua franca to avoid the political difficulties inherent in promoting any one indigenous language above the others. As a result of the growth of the British Empire, English was adopted in North America, India, Africa, Australia and many other regions, a trend extended with the emergence of the United States as a superpower in the mid-20th century.
+ Classification and related languages
- There are many variations of passages of Lorem Ipsum available, but the majority have suffered alteration in some form, by injected humour, or randomised words which don't look even slightly believable. If you are going to use a passage of Lorem Ipsum, you need to be sure there isn't anything embarrassing hidden in the middle of text. All the Lorem Ipsum generators on the Internet tend to repeat predefined chunks as necessary, making this the first true generator on the Internet. It uses a dictionary of over Latin words, combined with a handful of model sentence structures, to generate Lorem Ipsum which looks reasonable. The generated Lorem Ipsum is therefore always free from repetition, injected humour, or non-characteristic words etc.
+ The English language belongs to the Anglo-Frisian sub-group of the West Germanic branch of the Germanic family, a member of the Indo-European languages. Modern English is the direct descendant of Middle English, itself a direct descendant of Old English, a descendant of Proto-Germanic. Typical of most Germanic languages, English is characterised by the use of modal verbs, the division of verbs into strong and weak classes, and common sound shifts from Proto-Indo-European known as Grimm's Law. The closest living relatives of English are the Scots language (spoken primarily in Scotland and parts of Ireland) and Frisian (spoken on the southern fringes of the North Sea in Denmark, the Netherlands, and Germany).
+ After Scots and Frisian come those Germanic languages that are more distantly related: the non-Anglo-Frisian West Germanic languages (Dutch, Afrikaans, Low German, High German), and the North Germanic languages (Swedish, Danish, Norwegian, Icelandic, and Faroese). With the (partial) exception of Scots, none of the other languages is mutually intelligible with English, owing in part to the divergences in lexis, syntax, semantics, and phonology, and to the isolation afforded to the English language by the British Isles, although some, such as Dutch, do show strong affinities with English, especially to earlier stages of the language. Isolation has allowed English and Scots (as well as Icelandic and Faroese) to develop independently of the Continental Germanic languages and their influences over time.
+ In addition to isolation, lexical differences between English and other Germanic languages exist due to heavy borrowing in English of words from Latin and French. For example, compare "exit" (Latin), vs. Dutch uitgang, literally "out-going" (though outgang survives dialectally in restricted usage) and "change" (French) vs. German Änderung (literally "alteration, othering"); "movement" (French) vs. German Bewegung ("be-way-ing", i.e. "proceeding along the way"); etc. Preference of one synonym over another also causes differentiation in lexis, even where both words are Germanic, as in English care vs. German Sorge. Both words descend from Proto-Germanic *karō and *surgō respectively, but *karō has become the dominant word in English for "care" while in German, Dutch, and Scandinavian languages, the *surgō root prevailed. *Surgō still survives in English, however, as sorrow.
+ Despite lexical borrowing, English retains its classification as a Germanic language due to its structure and grammar. Non-native words are incorporated into a Germanic system of conjugation, declension, and syntax (For example, the word reduce is borrowed from Latin redūcere; however, in English we say "I reduce - I reduced - I will reduce" rather than "redūcō - redūxī - redūcam"; likewise, we say: "John's life insurance company" rather than "the company of insurance life of John", cf. the French: la compagnie d'assurance-vie de John). Furthermore, in English, all basic grammatical particles added to nouns, verbs, adjectives, and adverbs are Germanic. For nouns, these include the normal plural marker -s/-es, and the possessive markers -'s and -s' . For verbs, these include the third person present ending -s/-es (e.g. he stands/he reaches ), the present participle ending -ing, the simple past tense and past participle ending -ed, and the formation of the English infinitive using to (e.g. "to drive"; cf. Old English tō drīfenne). Adverbs generally receive an -ly ending, and adjectives and adverbs are inflected for the comparative and superlative using -er and -est (e.g. fast/faster/fastest), or through a combination with more and most. These particles append freely to all English words regardless of origin (tsunamis; communicates; to buccaneer; during; calmer; bizarrely) and all derive from Old English. Even the lack or absence of affixes, known as zero or null (-Ø) affixes, derive from endings which previously existed in Old English (usually -e, -a, -u, -o, -an, etc.), that later weakened to -e, and have since ceased to be pronounced and spelt (e.g. Modern English "I sing" = I sing-Ø < I singe < Old English ic singe; "we thought" = we thought-Ø < we thoughte(n) < Old English wē þōhton).
+ Although the syntax of English is somewhat different from that of other West Germanic languages with regards to the placement and order of verbs (for example, "I have never seen anything in the square" = German Ich habe nie etwas auf dem Platz gesehen, and the Dutch Ik heb nooit iets op het plein gezien, where the participle is placed at the end), English syntax continues to adhere closely to that of the North Germanic languages, which are believed to have influenced English syntax during the Middle English Period (e.g., Danish Jeg har aldrig set noget på torvet; Icelandic Ég hef aldrei séð neitt á torginu). As in most Germanic languages, English adjectives usually come before the noun they modify, even when the adjective is of Latinate origin (e.g. medical emergency, national treasure). Also, English continues to make extensive use of self-explaining compounds (e.g. streetcar, classroom), and nouns which serve as modifiers (e.g. lamp post, life insurance company), traits inherited from Old English (See also Kenning).
+ The kinship with other Germanic languages can also be seen in the tensing of English verbs (e.g. English fall/fell/fallen/will or shall fall, West Frisian fal/foel/fallen/sil falle, Dutch vallen/viel/gevallen/zullen vallen, German fallen/fiell/gefallen/werden fallen), the comparatives of adjectives and adverbs (e.g. English good/better/best, West Frisian goed/better/best, Dutch goed/beter/best, German gut/besser/best), the treatment of nouns (English shoemaker, shoemaker's, shoemakers, shoemakers'; Dutch schoenmaker, schoenmakers, schoenmakers, schoenmakeren; Swedish skomaker, skomakers, skomakere, skomakere), and the large amount of cognates (e.g. English wet, Scots weet, West Frisian wiet, Swedish våt; English send, Dutch zenden, German senden; English meaning, Swedish mening, Icelandic meining, etc.). It also gives rise to false friends (e.g. English time vs Norwegian time, meaning "hour"; English gift vs German Gift, meaning "poison"), while differences in phonology can obscure words that really are related (tooth vs. German Zahn; compare also Danish tand). Sometimes both semantics and phonology are different (German Zeit ("time") is related to English "tide", but the English word, through a transitional phase of meaning "period"/"interval", has come primarily to mean gravitational effects on the ocean by the moon, though the original meaning is preserved in forms like tidings and betide, and phrases such as to tide over).[citation needed]
+ Many North Germanic words entered English due to the settlement of Viking raiders and Danish invasions which began around the 9th century (see Danelaw). Many of these words are common words, often mistaken for being native, which shows how close-knit the relations between the English and the Scandinavian settlers were (See below: Old Norse origins). Dutch and Low German also had a considerable influence on English vocabulary, contributing common everyday terms and many nautical and trading terms (See below: Dutch and Low German origins).
+ Finally, English has been forming compound words and affixing existing words separately from the other Germanic languages for over 1500 years and has different habits in that regard. For instance, abstract nouns in English may be formed from native words by the suffixes "‑hood", "-ship", "-dom" and "-ness". All of these have cognate suffixes in most or all other Germanic languages, but their usage patterns have diverged, as German "Freiheit" vs. English "freedom" (the suffix "-heit" being cognate of English "-hood", while English "-dom" is cognate with German "-tum"). The Germanic languages Icelandic and Faroese also follow English in this respect, since, like English, they developed independent of German influences.
+ Many French words are also intelligible to an English speaker, especially when they are seen in writing (as pronunciations are often quite different), because English absorbed a large vocabulary from Norman and French, via Anglo-Norman after the Norman Conquest, and directly from French in subsequent centuries. As a result, a large portion of English vocabulary is derived from French, with some minor spelling differences (e.g. inflectional endings, use of old French spellings, lack of diacritics, etc.), as well as occasional divergences in meaning of so-called false friends: for example, compare "library" with the French librairie, which means bookstore; in French, the word for "library" is bibliothèque. The pronunciation of most French loanwords in English (with the exception of a handful of more recently borrowed words such as mirage, genre, café; or phrases like coup d’état, rendez-vous, etc.) has become largely anglicised and follows a typically English phonology and pattern of stress (compare English "nature" vs. French nature, "button" vs. bouton, "table" vs. table, "hour" vs. heure, "reside" vs. résider, etc.).
+ Geographical distribution
- Translations: Can you help translate this site into a foreign language ? Please email us with details if you can help.
- There are now a set of mock banners available here in three colours and in a range of standard banner sizes:
- Donate: If you use this site regularly and would like to help keep the site on the Internet, please consider donating a small sum to help pay for the hosting and bandwidth bill. There is no minimum donation, any sum is appreciated - click here to donate using PayPal. Thank you for your support.
- WWW::Lipsum Chrome Firefox Add-on TeX Package Java Class Python Interface GTK Lipsum ActionScript Rails Groovy
- The standard Lorem Ipsum passage, used since the s
+ See also: List of countries by English-speaking population
- "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
- Section .. of "de Finibus Bonorum et Malorum", written by Cicero in BC
+ Pie chart showing the relative numbers of native English speakers in the major English-speaking countries of the world
+ Approximately 375 million people speak English as their first language. English today is probably the third largest language by number of native speakers, after Mandarin Chinese and Spanish. However, when combining native and non-native speakers it is probably the most commonly spoken language in the world, though possibly second to a combination of the Chinese languages (depending on whether or not distinctions in the latter are classified as "languages" or "dialects").
+ Estimates that include second language speakers vary greatly from 470 million to over a billion depending on how literacy or mastery is defined and measured. Linguistics professor David Crystal calculates that non-native speakers now outnumber native speakers by a ratio of 3 to 1.
+ The countries with the highest populations of native English speakers are, in descending order: United States (215 million), United Kingdom (61 million), Canada (18.2 million), Australia (15.5 million), Nigeria (4 million), Ireland (3.8 million), South Africa (3.7 million), and New Zealand (3.6 million) 2006 Census.
+ Countries such as the Philippines, Jamaica and Nigeria also have millions of native speakers of dialect continua ranging from an English-based creole to a more standard version of English. Of those nations where English is spoken as a second language, India has the most such speakers ('Indian English'). Crystal claims that, combining native and non-native speakers, India now has more people who speak or understand English than any other country in the world.
- "Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla pariatur?"
- translation by H. Rackham
- "But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure?"
- Section .. of "de Finibus Bonorum et Malorum", written by Cicero in BC
- "At vero eos et accusamus et iusto odio dignissimos ducimus qui blanditiis praesentium voluptatum deleniti atque corrupti quos dolores et quas molestias excepturi sint occaecati cupiditate non provident, similique sunt in culpa qui officia deserunt mollitia animi, id est laborum et dolorum fuga. Et harum quidem rerum facilis est et expedita distinctio. Nam libero tempore, cum soluta nobis est eligendi optio cumque nihil impedit quo minus id quod maxime placeat facere possimus, omnis voluptas assumenda est, omnis dolor repellendus. Temporibus autem quibusdam et aut officiis debitis aut rerum necessitatibus saepe eveniet ut et voluptates repudiandae sint et molestiae non recusandae. Itaque earum rerum hic tenetur a sapiente delectus, ut aut reiciendis voluptatibus maiores alias consequatur aut perferendis doloribus asperiores repellat."
- translation by H. Rackham
- "On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection: he rejects pleasures to secure other greater pleasures, or else he endures pains to avoid worse pains."
+ Countries where English is a major language
+ English is the primary language in Anguilla, Antigua and Barbuda, Australia, the Bahamas, Barbados, Belize, Bermuda, the British Indian Ocean Territory, the British Virgin Islands, Canada, the Cayman Islands, Dominica, the Falkland Islands, Gibraltar, Grenada, Guam, Guernsey, Guyana, Ireland, the Isle of Man, Jamaica, Jersey, Montserrat, Nauru, New Zealand, Pitcairn Islands, Saint Helena, Ascension and Tristan da Cunha, Saint Kitts and Nevis, Saint Vincent and the Grenadines, Singapore, South Georgia and the South Sandwich Islands, Trinidad and Tobago, the Turks and Caicos Islands, the United Kingdom and the United States.
+ In some countries where English is not the most spoken language, it is an official language; these countries include Botswana, Cameroon, the Federated States of Micronesia, Fiji, Gambia, Ghana, India, Kenya, Kiribati, Lesotho, Liberia, Madagascar, Malta, the Marshall Islands, Mauritius, Namibia, Nigeria, Pakistan, Palau, Papua New Guinea, the Philippines (Philippine English), Rwanda, Saint Lucia, Samoa, Seychelles, Sierra Leone, the Solomon Islands, Sri Lanka, Sudan, South Sudan, Swaziland, Tanzania, Uganda, Zambia, and Zimbabwe.
+ It is also one of the 11 official languages that are given equal status in South Africa (South African English). English is also the official language in current dependent territories of Australia (Norfolk Island, Christmas Island and Cocos Island) and of the United States (American Samoa, Guam, Northern Mariana Islands, Puerto Rico, and the US Virgin Islands), and the former British colony of Hong Kong. (See List of countries where English is an official language for more details.)
+ English is not an official language in the United States. Although the United States federal government has no official languages, English has been given official status by 30 of the 50 state governments. Although falling short of official status, English is also an important language in several former colonies and protectorates of the United Kingdom, such as Bahrain, Bangladesh, Brunei, Cyprus, Malaysia, and the United Arab Emirates.
+ English as a global language
+ See also: English in computing, International English, World language, and English as a foreign or second language
+ Because English is so widely spoken, it has often been referred to as a "world language", the lingua franca of the modern era, and while it is not an official language in most countries, it is currently the language most often taught as a foreign language. Some linguists believe that it is no longer the exclusive cultural property of "native English speakers", but is rather a language that is absorbing aspects of cultures worldwide as it continues to grow. It is, by international treaty, the official language for aerial and maritime communications. English is an official language of the United Nations and many other international organisations, including the International Olympic Committee.
+ English is the language most often studied as a foreign language in the European Union, by 89% of schoolchildren, ahead of French at 32%, while the perception of the usefulness of foreign languages amongst Europeans is 68% in favour of English ahead of 25% for French. Among some non-English speaking EU countries, a large percentage of the adult population can converse in English in particular: 85% in Sweden, 83% in Denmark, 79% in the Netherlands, 66% in Luxembourg and over 50% in Finland, Slovenia, Austria, Belgium, and Germany.
+ Books, magazines, and newspapers written in English are available in many countries around the world, and English is the most commonly used language in the sciences with Science Citation Index reporting as early as 1997 that 95% of its articles were written in English, even though only half of them came from authors in English-speaking countries.
+ This increasing use of the English language globally has had a large impact on many other languages, leading to language shift and even language death, and to claims of linguistic imperialism. English itself is now open to language shift as multiple regional varieties feed back into the language as a whole.
+ Dialects and regional varieties
+ Main article: List of dialects of the English language
+ The expansion of the British Empire and—since World War II—the influence of the United States have spread English around the world. Because of that global spread, English has developed a host of English dialects and English-based creole languages and pidgins.
+ Several educated native dialects of English have wide acceptance as standards in much of the world,. In the United Kingdom much emphasis is placed on Received Pronunciation, an educated dialect of South East England. General American, which is spread over most of the United States and much of Canada, is more typically the model for the American continents and areas (such as the Philippines) that have had either close association with the United States, or a desire to be so identified. In Oceania, the major native dialect of Australian English is spoken as a first language by the vast majority of the inhabitants of the Australian continent, with General Australian serving as the standard accent. The English of neighbouring New Zealand as well as that of South Africa have to a lesser degree been influential native varieties of the language.
+ Aside from these major dialects, there are numerous other varieties of English, which include, in most cases, several subvarieties, such as Cockney, Scouse and Geordie within British English; Newfoundland English within Canadian English; and African American Vernacular English ("Ebonics") and Southern American English within American English. English is a pluricentric language, without a central language authority like France's Académie française; and therefore no one variety is considered "correct" or "incorrect" except in terms of the expectations of the particular audience to which the language is directed.
+ Scots has its origins in early Northern Middle English and developed and changed during its history with influence from other sources, but following the Acts of Union 1707 a process of language attrition began, whereby successive generations adopted more and more features from Standard English, causing dialectalisation. Whether it is now a separate language or a dialect of English better described as Scottish English is in dispute, although the UK government now accepts Scots as a regional language and has recognised it as such under the European Charter for Regional or Minority Languages. There are a number of regional dialects of Scots, and pronunciation, grammar and lexis of the traditional forms differ, sometimes substantially, from other varieties of English.
+ English speakers have many different accents, which often signal the speaker's native dialect or language. For the most distinctive characteristics of regional accents, see Regional accents of English, and for a complete list of regional dialects, see List of dialects of the English language. Within England, variation is now largely confined to pronunciation rather than grammar or vocabulary. At the time of the Survey of English Dialects, grammar and vocabulary differed across the country, but a process of lexical attrition has led most of this variation to die out.
+ Just as English itself has borrowed words from many different languages over its history, English loanwords now appear in many languages around the world, indicative of the technological and cultural influence of its speakers. Several pidgins and creole languages have been formed on an English base, such as Jamaican Patois, Nigerian Pidgin, and Tok Pisin. There are many words in English coined to describe forms of particular non-English languages that contain a very high proportion of English words.
+ Constructed varieties of English
+ Basic English is simplified for easy international use. Manufacturers and other international businesses tend to write manuals and communicate in Basic English. Some English schools in Asia teach it as a practical subset of English for use by beginners.
+ E-Prime excludes forms of the verb to be.
+ English reform is an attempt to improve collectively upon the English language.
+ Manually Coded English constitutes a variety of systems that have been developed to represent the English language with hand signals, designed primarily for use in deaf education. These should not be confused with true sign languages such as British Sign Language and American Sign Language used in Anglophone countries, which are independent and not based on English.
+ Seaspeak and the related Airspeak and Policespeak, all based on restricted vocabularies, were designed by Edward Johnson in the 1980s to aid international cooperation and communication in specific areas. There is also a tunnelspeak for use in the Channel Tunnel.
+ Simplified Technical English was historically developed for aerospace industry maintenance manuals and is now used in various industries.
+ Special English is a simplified version of English used by the Voice of America. It uses a vocabulary of only 1500 words.
@@ -12,8 +12,7 @@ class ClassifierTest < Test::Unit::TestCase
  @datext = "Gennem anstrengelser når man stjernerne."
  @jptext = " ラ゜珥 ドゥ背騥ヴェ祟 ウァ諤椺と䤎 覥ヒュぱカキャ ゝド"
  @hitext = "ಚಿತ್ರಲಿಪಿಯಿಂದ ಹಿಡಿದು ಇಂದಿನ ಮುದ್ರಣ— ಕಂಪ್ಯೂಟರ್"
- @lstext = "wtf j00 t41k1n b0ut"
- @sc = sc =
+ @sc =
  should "correctly identify the languages based on the given text" do
@@ -25,7 +24,6 @@ class ClassifierTest < Test::Unit::TestCase
  assert_equal "danish", @sc.classify_string(@datext).first
  assert_equal "japanese", @sc.classify_string(@jptext).first
  assert_equal "hindi", @sc.classify_string(@hitext).first
- assert_equal "13375p33k", @sc.classify_string(@lstext).first
  should "correctly identify the language based on a given file" do