treat 0.2.5 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (242) hide show
  1. data/LICENSE +3 -3
  2. data/README.md +33 -0
  3. data/files/INFO +1 -0
  4. data/lib/treat.rb +40 -105
  5. data/lib/treat/ai.rb +12 -0
  6. data/lib/treat/ai/classifiers/id3.rb +27 -0
  7. data/lib/treat/categories.rb +82 -35
  8. data/lib/treat/categorizable.rb +44 -0
  9. data/lib/treat/classification.rb +61 -0
  10. data/lib/treat/configurable.rb +115 -0
  11. data/lib/treat/data_set.rb +42 -0
  12. data/lib/treat/dependencies.rb +24 -0
  13. data/lib/treat/downloader.rb +87 -0
  14. data/lib/treat/entities.rb +68 -66
  15. data/lib/treat/entities/abilities.rb +10 -0
  16. data/lib/treat/entities/abilities/buildable.rb +327 -0
  17. data/lib/treat/entities/abilities/checkable.rb +31 -0
  18. data/lib/treat/entities/abilities/copyable.rb +45 -0
  19. data/lib/treat/entities/abilities/countable.rb +51 -0
  20. data/lib/treat/entities/abilities/debuggable.rb +83 -0
  21. data/lib/treat/entities/abilities/delegatable.rb +123 -0
  22. data/lib/treat/entities/abilities/doable.rb +62 -0
  23. data/lib/treat/entities/abilities/exportable.rb +11 -0
  24. data/lib/treat/entities/abilities/iterable.rb +115 -0
  25. data/lib/treat/entities/abilities/magical.rb +83 -0
  26. data/lib/treat/entities/abilities/registrable.rb +74 -0
  27. data/lib/treat/entities/abilities/stringable.rb +91 -0
  28. data/lib/treat/entities/entities.rb +104 -0
  29. data/lib/treat/entities/entity.rb +122 -245
  30. data/lib/treat/exception.rb +4 -4
  31. data/lib/treat/extractors.rb +77 -80
  32. data/lib/treat/extractors/keywords/tf_idf.rb +56 -22
  33. data/lib/treat/extractors/language/what_language.rb +50 -45
  34. data/lib/treat/extractors/name_tag/stanford.rb +55 -0
  35. data/lib/treat/extractors/tf_idf/native.rb +87 -0
  36. data/lib/treat/extractors/time/chronic.rb +55 -0
  37. data/lib/treat/extractors/time/nickel.rb +86 -62
  38. data/lib/treat/extractors/time/ruby.rb +53 -0
  39. data/lib/treat/extractors/topic_words/lda.rb +67 -58
  40. data/lib/treat/extractors/topics/reuters.rb +100 -87
  41. data/lib/treat/formatters.rb +39 -35
  42. data/lib/treat/formatters/readers/abw.rb +49 -29
  43. data/lib/treat/formatters/readers/autoselect.rb +37 -33
  44. data/lib/treat/formatters/readers/doc.rb +19 -13
  45. data/lib/treat/formatters/readers/html.rb +52 -30
  46. data/lib/treat/formatters/readers/image.rb +41 -40
  47. data/lib/treat/formatters/readers/odt.rb +59 -45
  48. data/lib/treat/formatters/readers/pdf.rb +28 -25
  49. data/lib/treat/formatters/readers/txt.rb +12 -15
  50. data/lib/treat/formatters/readers/xml.rb +73 -36
  51. data/lib/treat/formatters/serializers/xml.rb +80 -79
  52. data/lib/treat/formatters/serializers/yaml.rb +19 -18
  53. data/lib/treat/formatters/unserializers/autoselect.rb +12 -22
  54. data/lib/treat/formatters/unserializers/xml.rb +94 -99
  55. data/lib/treat/formatters/unserializers/yaml.rb +20 -19
  56. data/lib/treat/formatters/visualizers/dot.rb +132 -132
  57. data/lib/treat/formatters/visualizers/standoff.rb +52 -44
  58. data/lib/treat/formatters/visualizers/tree.rb +26 -29
  59. data/lib/treat/groupable.rb +153 -0
  60. data/lib/treat/helpers/decimal_point_escaper.rb +22 -0
  61. data/lib/treat/inflectors.rb +50 -45
  62. data/lib/treat/inflectors/cardinalizers/linguistics.rb +40 -0
  63. data/lib/treat/inflectors/conjugators/linguistics.rb +55 -0
  64. data/lib/treat/inflectors/declensors/active_support.rb +31 -0
  65. data/lib/treat/inflectors/declensors/english.rb +38 -0
  66. data/lib/treat/inflectors/declensors/english/inflect.rb +288 -0
  67. data/lib/treat/inflectors/declensors/linguistics.rb +49 -0
  68. data/lib/treat/inflectors/ordinalizers/linguistics.rb +17 -0
  69. data/lib/treat/inflectors/stemmers/porter.rb +160 -0
  70. data/lib/treat/inflectors/stemmers/porter_c.rb +24 -0
  71. data/lib/treat/inflectors/stemmers/uea.rb +28 -0
  72. data/lib/treat/installer.rb +308 -0
  73. data/lib/treat/kernel.rb +105 -27
  74. data/lib/treat/languages.rb +122 -88
  75. data/lib/treat/languages/arabic.rb +15 -15
  76. data/lib/treat/languages/chinese.rb +15 -15
  77. data/lib/treat/languages/dutch.rb +15 -15
  78. data/lib/treat/languages/english.rb +61 -62
  79. data/lib/treat/languages/french.rb +19 -19
  80. data/lib/treat/languages/german.rb +20 -20
  81. data/lib/treat/languages/greek.rb +15 -15
  82. data/lib/treat/languages/italian.rb +16 -16
  83. data/lib/treat/languages/polish.rb +15 -15
  84. data/lib/treat/languages/portuguese.rb +15 -15
  85. data/lib/treat/languages/russian.rb +15 -15
  86. data/lib/treat/languages/spanish.rb +16 -16
  87. data/lib/treat/languages/swedish.rb +16 -16
  88. data/lib/treat/lexicalizers.rb +34 -55
  89. data/lib/treat/lexicalizers/categorizers/from_tag.rb +54 -0
  90. data/lib/treat/lexicalizers/sensers/wordnet.rb +57 -0
  91. data/lib/treat/lexicalizers/sensers/wordnet/synset.rb +71 -0
  92. data/lib/treat/lexicalizers/taggers/brill.rb +70 -0
  93. data/lib/treat/lexicalizers/taggers/brill/patch.rb +61 -0
  94. data/lib/treat/lexicalizers/taggers/lingua.rb +90 -0
  95. data/lib/treat/lexicalizers/taggers/stanford.rb +97 -0
  96. data/lib/treat/linguistics.rb +9 -0
  97. data/lib/treat/linguistics/categories.rb +11 -0
  98. data/lib/treat/linguistics/tags.rb +422 -0
  99. data/lib/treat/loaders/linguistics.rb +30 -0
  100. data/lib/treat/loaders/stanford.rb +27 -0
  101. data/lib/treat/object.rb +1 -0
  102. data/lib/treat/processors.rb +37 -44
  103. data/lib/treat/processors/chunkers/autoselect.rb +16 -0
  104. data/lib/treat/processors/chunkers/html.rb +71 -0
  105. data/lib/treat/processors/chunkers/txt.rb +18 -24
  106. data/lib/treat/processors/parsers/enju.rb +253 -208
  107. data/lib/treat/processors/parsers/stanford.rb +130 -131
  108. data/lib/treat/processors/segmenters/punkt.rb +79 -45
  109. data/lib/treat/processors/segmenters/stanford.rb +46 -48
  110. data/lib/treat/processors/segmenters/tactful.rb +43 -36
  111. data/lib/treat/processors/tokenizers/perl.rb +124 -92
  112. data/lib/treat/processors/tokenizers/ptb.rb +81 -0
  113. data/lib/treat/processors/tokenizers/punkt.rb +48 -42
  114. data/lib/treat/processors/tokenizers/stanford.rb +39 -38
  115. data/lib/treat/processors/tokenizers/tactful.rb +64 -55
  116. data/lib/treat/proxies.rb +52 -35
  117. data/lib/treat/retrievers.rb +26 -16
  118. data/lib/treat/retrievers/indexers/ferret.rb +47 -26
  119. data/lib/treat/retrievers/searchers/ferret.rb +69 -50
  120. data/lib/treat/tree.rb +241 -183
  121. data/spec/collection.rb +123 -0
  122. data/spec/document.rb +93 -0
  123. data/spec/entity.rb +408 -0
  124. data/spec/languages.rb +25 -0
  125. data/spec/phrase.rb +146 -0
  126. data/spec/samples/mathematicians/archimedes.abw +34 -0
  127. data/spec/samples/mathematicians/euler.html +21 -0
  128. data/spec/samples/mathematicians/gauss.pdf +0 -0
  129. data/spec/samples/mathematicians/leibniz.txt +13 -0
  130. data/spec/samples/mathematicians/newton.doc +0 -0
  131. data/spec/sandbox.rb +5 -0
  132. data/spec/token.rb +109 -0
  133. data/spec/treat.rb +52 -0
  134. data/spec/tree.rb +117 -0
  135. data/spec/word.rb +110 -0
  136. data/spec/zone.rb +66 -0
  137. data/tmp/INFO +1 -1
  138. metadata +100 -201
  139. data/INSTALL +0 -1
  140. data/README +0 -3
  141. data/TODO +0 -28
  142. data/lib/economist/half_cocked_basel.txt +0 -16
  143. data/lib/economist/hungarys_troubles.txt +0 -46
  144. data/lib/economist/indias_slowdown.txt +0 -15
  145. data/lib/economist/merkozy_rides_again.txt +0 -24
  146. data/lib/economist/prada_is_not_walmart.txt +0 -9
  147. data/lib/economist/to_infinity_and_beyond.txt +0 -15
  148. data/lib/ferret/_11.cfs +0 -0
  149. data/lib/ferret/_14.cfs +0 -0
  150. data/lib/ferret/_p.cfs +0 -0
  151. data/lib/ferret/_s.cfs +0 -0
  152. data/lib/ferret/_v.cfs +0 -0
  153. data/lib/ferret/_y.cfs +0 -0
  154. data/lib/ferret/segments +0 -0
  155. data/lib/ferret/segments_15 +0 -0
  156. data/lib/treat/buildable.rb +0 -157
  157. data/lib/treat/category.rb +0 -33
  158. data/lib/treat/delegatable.rb +0 -116
  159. data/lib/treat/doable.rb +0 -45
  160. data/lib/treat/entities/collection.rb +0 -14
  161. data/lib/treat/entities/document.rb +0 -12
  162. data/lib/treat/entities/phrases.rb +0 -17
  163. data/lib/treat/entities/tokens.rb +0 -61
  164. data/lib/treat/entities/zones.rb +0 -41
  165. data/lib/treat/extractors/coreferences/stanford.rb +0 -69
  166. data/lib/treat/extractors/date/chronic.rb +0 -32
  167. data/lib/treat/extractors/date/ruby.rb +0 -25
  168. data/lib/treat/extractors/keywords/topics_tf_idf.rb +0 -48
  169. data/lib/treat/extractors/language/language_extractor.rb +0 -27
  170. data/lib/treat/extractors/named_entity_tag/stanford.rb +0 -53
  171. data/lib/treat/extractors/roles/naive.rb +0 -73
  172. data/lib/treat/extractors/statistics/frequency_in.rb +0 -16
  173. data/lib/treat/extractors/statistics/position_in.rb +0 -14
  174. data/lib/treat/extractors/statistics/tf_idf.rb +0 -104
  175. data/lib/treat/extractors/statistics/transition_matrix.rb +0 -105
  176. data/lib/treat/extractors/statistics/transition_probability.rb +0 -57
  177. data/lib/treat/extractors/topic_words/lda/data.dat +0 -46
  178. data/lib/treat/extractors/topic_words/lda/wiki.yml +0 -121
  179. data/lib/treat/extractors/topics/reuters/industry.xml +0 -2717
  180. data/lib/treat/extractors/topics/reuters/region.xml +0 -13586
  181. data/lib/treat/extractors/topics/reuters/topics.xml +0 -17977
  182. data/lib/treat/feature.rb +0 -58
  183. data/lib/treat/features.rb +0 -7
  184. data/lib/treat/formatters/visualizers/short_value.rb +0 -29
  185. data/lib/treat/formatters/visualizers/txt.rb +0 -45
  186. data/lib/treat/group.rb +0 -106
  187. data/lib/treat/helpers/linguistics_loader.rb +0 -18
  188. data/lib/treat/inflectors/cardinal_words/linguistics.rb +0 -42
  189. data/lib/treat/inflectors/conjugations/linguistics.rb +0 -36
  190. data/lib/treat/inflectors/declensions/english.rb +0 -319
  191. data/lib/treat/inflectors/declensions/linguistics.rb +0 -42
  192. data/lib/treat/inflectors/ordinal_words/linguistics.rb +0 -20
  193. data/lib/treat/inflectors/stem/porter.rb +0 -162
  194. data/lib/treat/inflectors/stem/porter_c.rb +0 -26
  195. data/lib/treat/inflectors/stem/uea.rb +0 -30
  196. data/lib/treat/install.rb +0 -59
  197. data/lib/treat/languages/tags.rb +0 -377
  198. data/lib/treat/lexicalizers/category/from_tag.rb +0 -49
  199. data/lib/treat/lexicalizers/linkages/naive.rb +0 -63
  200. data/lib/treat/lexicalizers/synsets/wordnet.rb +0 -76
  201. data/lib/treat/lexicalizers/tag/brill.rb +0 -91
  202. data/lib/treat/lexicalizers/tag/lingua.rb +0 -123
  203. data/lib/treat/lexicalizers/tag/stanford.rb +0 -70
  204. data/lib/treat/processors/segmenters/punkt/dutch.yaml +0 -9716
  205. data/lib/treat/processors/segmenters/punkt/english.yaml +0 -10340
  206. data/lib/treat/processors/segmenters/punkt/french.yaml +0 -43159
  207. data/lib/treat/processors/segmenters/punkt/german.yaml +0 -9572
  208. data/lib/treat/processors/segmenters/punkt/greek.yaml +0 -6050
  209. data/lib/treat/processors/segmenters/punkt/italian.yaml +0 -14748
  210. data/lib/treat/processors/segmenters/punkt/polish.yaml +0 -9751
  211. data/lib/treat/processors/segmenters/punkt/portuguese.yaml +0 -13662
  212. data/lib/treat/processors/segmenters/punkt/russian.yaml +0 -4237
  213. data/lib/treat/processors/segmenters/punkt/spanish.yaml +0 -24034
  214. data/lib/treat/processors/segmenters/punkt/swedish.yaml +0 -10001
  215. data/lib/treat/processors/tokenizers/macintyre.rb +0 -77
  216. data/lib/treat/processors/tokenizers/multilingual.rb +0 -30
  217. data/lib/treat/registrable.rb +0 -28
  218. data/lib/treat/sugar.rb +0 -50
  219. data/lib/treat/viewable.rb +0 -29
  220. data/lib/treat/visitable.rb +0 -28
  221. data/test/profile.rb +0 -2
  222. data/test/tc_entity.rb +0 -117
  223. data/test/tc_extractors.rb +0 -73
  224. data/test/tc_formatters.rb +0 -41
  225. data/test/tc_inflectors.rb +0 -34
  226. data/test/tc_lexicalizers.rb +0 -32
  227. data/test/tc_processors.rb +0 -50
  228. data/test/tc_resources.rb +0 -22
  229. data/test/tc_treat.rb +0 -60
  230. data/test/tc_tree.rb +0 -60
  231. data/test/tests.rb +0 -20
  232. data/test/texts.rb +0 -19
  233. data/test/texts/english/half_cocked_basel.txt +0 -16
  234. data/test/texts/english/hose_and_dry.doc +0 -0
  235. data/test/texts/english/hungarys_troubles.abw +0 -70
  236. data/test/texts/english/long.html +0 -24
  237. data/test/texts/english/long.txt +0 -22
  238. data/test/texts/english/medium.txt +0 -5
  239. data/test/texts/english/republican_nomination.pdf +0 -0
  240. data/test/texts/english/saving_the_euro.odt +0 -0
  241. data/test/texts/english/short.txt +0 -3
  242. data/test/texts/english/zero_sum.html +0 -111
data/spec/languages.rb ADDED
@@ -0,0 +1,25 @@
1
+ require_relative '../lib/treat'
2
+
3
+ describe Treat::Languages do
4
+
5
+ describe "#code(language, iso = 2)" do
6
+
7
+ it "returns the language code given a full-length " +
8
+ "lowercase identifier representing a language, in " +
9
+ "the specified ISO-639 format (1 or 2)" do
10
+ Treat::Languages.code(:english, 2).should eql :eng
11
+ Treat::Languages.code(:english, 1).should eql :en
12
+ end
13
+
14
+ end
15
+
16
+ describe "#describe(code)" do
17
+
18
+ it "returns a lowercase identifier representing the " +
19
+ "full name of a language, given its ISO-639-1/2 code." do
20
+ Treat::Languages.describe(:eng).should eql :english
21
+ end
22
+
23
+ end
24
+
25
+ end
data/spec/phrase.rb ADDED
@@ -0,0 +1,146 @@
1
+ require_relative '../lib/treat'
2
+
3
+ describe Treat::Entities::Phrase do
4
+
5
+ describe "Buildable" do
6
+
7
+ describe "#build" do
8
+
9
+ context "when supplied with a sentence" do
10
+
11
+ it "creates a sentence with the text" do
12
+ sentence = "This is a sentence."
13
+ s = Treat::Entities::Phrase.build(sentence)
14
+ s.type.should eql :sentence
15
+ s.to_s.should eql sentence
16
+ end
17
+
18
+ end
19
+
20
+ context "when supplied with a phrase" do
21
+
22
+ it "creates a phrase with the text" do
23
+ phrase = "this is a phrase"
24
+ p = Treat::Entities::Phrase.build(phrase)
25
+ p.type.should eql :phrase
26
+ p.to_s.should eql phrase
27
+ end
28
+
29
+ end
30
+
31
+ end
32
+
33
+ end
34
+
35
+ describe "Extractable" do
36
+
37
+ describe "#named_entity" do
38
+ it "tags the named entity words in the phrase" do
39
+ # Not implemented.
40
+ end
41
+ end
42
+
43
+ describe "#time" do
44
+ it "returns a DateTime object representing the time in the phrase" do
45
+ Treat::Languages::English::Extractors[:time].each do |e|
46
+ t = 'october 2006'.time(e)
47
+ t.month.should eql 10
48
+ end
49
+ end
50
+ end
51
+ end
52
+
53
+ describe "Processable" do
54
+
55
+ describe "#tokenize" do
56
+
57
+ it "splits a phrase/sentence into tokens and adds them as children of the phrase" do
58
+ Treat::Languages::English::Processors[:tokenizers].each do |t|
59
+ @phrase = Treat::Entities::Phrase.new('a phrase to tokenize')
60
+ @phrase.tokenize(t)
61
+ @phrase.children.should eql @phrase.tokens
62
+ @phrase.tokens.map { |t| t.to_s }.should
63
+ eql ['A', 'sentence', 'to', 'tokenize']
64
+ end
65
+ end
66
+
67
+ end
68
+
69
+ describe "#parse" do
70
+
71
+ it "parses a phrase/sentence into its syntax tree, " +
72
+ "adding nested phrases and tokens as children of the phrase/sentence" do
73
+ Treat::Languages::English::Processors[:parsers].each do |p|
74
+ next #f p == :enju # slow?
75
+ @sentence = Treat::Entities::
76
+ Sentence.new('A sentence to tokenize.')
77
+ @sentence.parse(p)
78
+ @sentence.phrases.map { |t| t.to_s }.should
79
+ eql ["A sentence to tokenize.",
80
+ "A sentence to tokenize.",
81
+ "A sentence", "to tokenize",
82
+ "tokenize"]
83
+ end
84
+ end
85
+
86
+ end
87
+
88
+ end
89
+
90
+ describe "Lexicalizable" do
91
+
92
+ before do
93
+ @taggers = Treat::Languages::English::Lexicalizers[:taggers]
94
+ end
95
+
96
+ describe "#tag" do
97
+
98
+ context "when called on an untokenized phrase" do
99
+ it "returns the tag 'P'" do
100
+ @taggers.each do |t|
101
+ p = 'a phrase'
102
+ p.tag(t)
103
+ p.tag(t).should eql 'P'
104
+ end
105
+ end
106
+ end
107
+
108
+ context "when called on an untokenized sentence" do
109
+ it "returns the tag 'S'" do
110
+ @taggers.each do |t|
111
+ s = 'This is a sentence.'
112
+ s.tag(t)
113
+ s.tag.should eql 'S'
114
+ end
115
+ end
116
+ end
117
+
118
+ context "when called a tokenized phrase" do
119
+ it "returns the tag 'P' and tags all the phrase's tokens" do
120
+ @taggers.each do |t|
121
+ p = 'a phrase'.to_entity
122
+ p.tokenize
123
+ p.tag(t).should eql 'P'
124
+ p.tokens.map { |t| t.tag }.should
125
+ eql ["DT", "NN"]
126
+ end
127
+ end
128
+ end
129
+
130
+ context "when called on a tokenized sentence" do
131
+ it "returns the tag 'S' and tags all the sentence's tokens" do
132
+ @taggers.each do |t|
133
+ s = 'This is a sentence.'.to_entity
134
+ s.tokenize
135
+ s.tag(t).should eql 'S'
136
+ s.tokens.map { |t| t.tag }.should
137
+ eql ["DT", "VBZ", "DT", "NN", "."]
138
+ end
139
+ end
140
+ end
141
+
142
+ end
143
+
144
+ end
145
+
146
+ end
@@ -0,0 +1,34 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE abiword PUBLIC "-//ABISOURCE//DTD AWML 1.0 Strict//EN" "http://www.abisource.com/awml.dtd">
3
+ <abiword template="false" styles="unlocked" xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:math="http://www.w3.org/1998/Math/MathML" xid-max="10" xmlns:dc="http://purl.org/dc/elements/1.1/" fileformat="1.0" xmlns:svg="http://www.w3.org/2000/svg" xmlns:awml="http://www.abisource.com/awml.dtd" xmlns="http://www.abisource.com/awml.dtd" xmlns:xlink="http://www.w3.org/1999/xlink" version="0.99.2" xml:space="preserve" props="dom-dir:ltr; document-footnote-restart-section:0; document-endnote-type:numeric; document-endnote-place-enddoc:1; document-endnote-initial:1; lang:en-US; document-endnote-restart-section:0; document-footnote-restart-page:0; document-footnote-type:numeric; document-footnote-initial:1; document-endnote-place-endsection:0">
4
+ <!-- ======================================================================== -->
5
+ <!-- This file is an AbiWord document. -->
6
+ <!-- AbiWord is a free, Open Source word processor. -->
7
+ <!-- More information about AbiWord is available at http://www.abisource.com/ -->
8
+ <!-- You should not edit this file by hand. -->
9
+ <!-- ======================================================================== -->
10
+
11
+ <metadata>
12
+ <m key="dc.format">application/x-abiword</m>
13
+ <m key="abiword.generator">AbiWord</m>
14
+ </metadata>
15
+ <history version="3" edit-time="53" last-saved="1331065613" uid="ed212e54-67c9-11e1-9351-a4456ecc32d5">
16
+ <version id="2" started="1331065303" uid="014dfec0-67ca-11e1-9351-a4456ecc32d5" auto="0" top-xid="9"/>
17
+ <version id="3" started="1331065613" uid="b65b3fb2-67ca-11e1-8445-dcf035d382c2" auto="0" top-xid="10"/>
18
+ </history>
19
+ <styles>
20
+ <s type="P" name="Normal" followedby="Current Settings" props="font-weight:normal; font-family:Times New Roman; margin-top:0pt; color:000000; margin-left:0pt; bgcolor:transparent; widows:2; font-style:normal; text-indent:0in; text-position:normal; margin-bottom:0pt; lang:en-US; line-height:1.0; text-align:left; font-variant:normal; text-decoration:none; margin-right:0pt; font-size:12pt; font-stretch:normal"/>
21
+ </styles>
22
+ <pagesize pagetype="Letter" orientation="portrait" width="8.500000" height="11.000000" units="in" page-scale="1.000000"/>
23
+ <section xid="8" props="page-margin-footer:0.5in; page-margin-header:0.5in">
24
+ <p style="Normal" xid="9" props="text-align:left; dom-dir:ltr">Archimedes of Syracuse (287-212 BC)</p>
25
+ <p style="Normal" xid="1" props="text-align:left; dom-dir:ltr"></p>
26
+ <p style="Normal" xid="2" props="text-align:left; dom-dir:ltr">Archimedes is universally acknowledged to be the greatest of ancient mathematicians. He studied at Euclid's school (probably after Euclid's death), but his work far surpassed the works of Euclid. His achievements are particularly impressive given the lack of good mathematical notation in his day. His proofs are noted not only for brilliance but for unequalled clarity, with a modern biographer (Heath) describing Archimedes' treatises as "without exception monuments of mathematical exposition ... so impressive in their perfection as to create a feeling akin to awe in the mind of the reader." Archimedes made advances in number theory, algebra, and analysis, but is most renowned for his many theorems of plane and solid geometry. He was first to prove Heron's formula for the area of a triangle. His excellent approximation to √3 indicates that he'd partially anticipated the method of continued fractions. He found a method to trisect an arbitrary angle (using a markable straightedge — the construction is impossible using strictly Platonic rules). Although it doesn't survive in his writings, Pappus reports that he discovered the Archimedean solids. One of his most remarkable and famous geometric results was determining the area of a parabolic section, for which he offered two independent proofs, one using his Principle of the Lever, the other using a geometric series.</p>
27
+ <p style="Normal" xid="10" props="text-align:left; dom-dir:ltr"></p>
28
+ <p style="Normal" xid="3" props="text-align:left; dom-dir:ltr">Archimedes anticipated integral calculus, most notably by determining the centers of mass of hemisphere and cylindrical wedge, and the volume of two cylinders' intersection. Although Archimedes made little use of differential calculus, Chasles credits him (along with Kepler, Cavalieri, and Fermat) as one of the four who developed calculus before Newton and Leibniz. He was similar to Newton in that he used his (non-rigorous) calculus to discover results, but then devised rigorous geometric proofs for publication. His original achievements in physics include the principles of leverage, the first law of hydrostatics, and inventions like the compound pulley, the hydraulic screw, and war machines. His books include Floating Bodies, Spirals, The Sand Reckoner, Measurement of the Circle, and Sphere and Cylinder. He developed the Stomachion puzzle (and solved a difficult enumeration problem involving it). Archimedes proved that the volume of a sphere is two-thirds the volume of a circumscribing cylinder. He requested that a representation of such a sphere and cylinder be inscribed on his tomb.</p>
29
+ <p style="Normal" xid="4" props="text-align:left; dom-dir:ltr"></p>
30
+ <p style="Normal" xid="5" props="text-align:left; dom-dir:ltr">Archimedes discovered formulae for the volume and surface area of a sphere, and may even have been first to notice and prove the simple relationship between a circle's circumference and area. For these reasons, π is often called Archimedes' constant. His approximation 223/71 &lt; π &lt; 22/7 was the best of his day, though Apollonius soon surpassed it. That Archimedes shared the attitude of later mathematicians like Hardy and Brouwer is suggested by Plutarch's comment that Archimedes regarded applied mathematics "as ignoble and sordid ... and did not deign to [write about his mechanical inventions; instead] he placed his whole ambition in those speculations the beauty and subtlety of which are untainted by any admixture of the common needs of life."</p>
31
+ <p style="Normal" xid="6" props="text-align:left; dom-dir:ltr"></p>
32
+ <p style="Normal" xid="7" props="text-align:left; dom-dir:ltr">In the 20th century, modern technology led to the discovery of new writings by Archimedes, hitherto hidden on a palimpsest, including a note that implies an understanding of the distinction between countable and uncountable infinities (a distinction which wasn't resolved until Georg Cantor, who lived 2300 years after the time of Archimedes). Although Newton may have been the most important mathematician, and Gauss the greatest theorem prover, it is widely accepted that Archimedes was the greatest genius who ever lived. Yet, Hart omits him altogether from his list of Most Influential Persons: Archimedes was simply too far ahead of his time to have great historical significance.</p>
33
+ </section>
34
+ </abiword>
@@ -0,0 +1,21 @@
1
+ <html>
2
+ <head>
3
+ <title>Leonhard Euler (1707-1783)</title>
4
+ </head>
5
+ <body>
6
+ <div>
7
+
8
+ <p>Euler may be the most influential mathematician who ever lived (though some would make him second to Euclid); he ranks #77 on Michael Hart's famous list of the Most Influential Persons in History. His colleagues called him "Analysis Incarnate." Laplace, famous for denying credit to fellow mathematicians, once said "Read Euler: he is our master in everything." His notations and methods in many areas are in use to this day. Euler was the most prolific mathematician in history and is often judged to be the best algorist of all time. (The ranking #4 may seem too low for this supreme mathematician, but Gauss succeeded at proving several theorems which had stumped Euler.)</p>
9
+
10
+ <p>Just as Archimedes extended Euclid's geometry to marvelous heights, so Euler took marvelous advantage of the analysis of Newton and Leibniz: He gave the world modern trigonometry, pioneered (along with Lagrange) the calculus of variations, generalized and proved the Newton-Giraud formulae, etc. He was also supreme at discrete mathematics, inventing graph theory and generating functions. Euler was also a major figure in number theory: He proved that the sum of the reciprocals of primes less than x is approx. (<i>ln ln</i> x), invented the totient function and used it to generalize Fermat's Little Theorem, found both the largest then-known prime and the largest then-known perfect number, proved <b>e</b> to be irrational, proved that all even perfect numbers must have the Mersenne number form that Euclid had discovered 2000 years earlier, and much more. Euler was also first to prove several interesting theorems of geometry, including facts about the <i>9-point Feuerbach circle</i>; relationships among a triangle's altitudes, medians, and circumscribing and inscribing circles; and an expression for a tetrahedron's area in terms of its sides. Euler was first to explore topology, proving theorems about the <i>Euler characteristic</i>. Although noted as the first great "pure mathematician," Euler engineered a system of pumps, wrote on philosophy, and made important contributions to music theory, acoustics, optics, celestial motions and mechanics. He extended Newton's Laws of Motion to rotating rigid bodies; and developed the Euler-Bernoulli beam equation. On a lighter note, Euler constructed a particularly "magical" magic square.</p>
11
+
12
+ <p>Euler combined his brilliance with phenomenal concentration. He developed the first method to estimate the Moon's orbit (the three-body problem which had stumped Newton), and he settled an arithmetic dispute involving 50 decimal places of a long convergent series. Both these feats were accomplished when he was totally blind. (About this he said "Now I will have less distraction.") Fran&ccedil;ois Arago said that "Euler calculated without apparent effort, as men breathe, or as eagles sustain themselves in the wind."</p>
13
+
14
+ <p>Four of the most important constant symbols in mathematics (<b><i>&pi;</i></b>, <b><i>e</i></b>, <b><i>i</i></b> = &radic;-1, and <b><i>&gamma;</i></b> = 0.57721566...) were all introduced or popularized by Euler, along with operators like <b><i>&Sigma;</i></b>. He did important work with <i>Riemann's zeta function</i> &nbsp; <b>&zeta;(s) = &sum; k<sup>-s</sup></b> &nbsp; (although it was not then known with that name or notation); he anticipated the concept of analytic continuation by "proving" <b>&zeta;(-1) = 1+2+3+4+... = -1/12</b>. As a young student of the Bernoulli family, Euler discovered the striking identity &nbsp; <b><i>&pi;</i><sup>2</sup>/6 = &zeta;(2)</b> &nbsp; This catapulted Euler to instant fame, since the right-side infinite sum (<i>1 + 1/4 + 1/9 + 1/16 + ...)</i> was a famous problem of the time. Among many other famous and important identities, Euler proved the Pentagonal Number Theorem (a beautiful little result which has inspired a variety of discoveries), and the Euler Product Formula &nbsp; &nbsp; <b>&zeta;(s) = &prod;(1-p<sup>-s</sup>)<sup>-1</sup></b> &nbsp; where the right-side product is taken over all primes <i>p</i>. His most famous identity (which Richard Feynman called an "almost astounding ... jewel") unifies the trigonometric and exponential functions: &nbsp; <b><i>e</i><sup><i>i</i> x</sup> = <i>cos</i> x + <i>i</i> <i>sin</i> x</b>.</p>
15
+
16
+ <p>Some of Euler's greatest formulae can be combined into curious-looking formulae for <i>&pi;</i>: <b>&nbsp; &pi;<sup>2</sup> &nbsp; = &nbsp; - <i>log</i><sup>2</sup>(-1) &nbsp; = &nbsp; 6 &prod;<sub>p&isin;<i>Prime</i></sub>(1-p<sup>-2</sup>)<sup>-1/2</sup> </b>
17
+ </p>
18
+
19
+ </div>
20
+ </body>
21
+ </html>
@@ -0,0 +1,13 @@
1
+ Gottfried Leibniz (1646-1716)
2
+
3
+ Leibniz was one of the most brilliant and prolific intellectuals ever; and his influence in mathematics (especially his co-invention of the infinitesimal calculus) was immense. His childhood IQ has been estimated as second-highest in all of history, behind only Goethe. Descriptions which have been applied to Leibniz include "one of the two greatest universal geniuses" (da Vinci was the other); "the most important logician between Aristotle and Boole;" and the "Father of Applied Science." Leibniz described himself as "the most teachable of mortals."
4
+
5
+ Mathematics was just a self-taught sideline for Leibniz, who was a philosopher, lawyer, historian, diplomat and renowned inventor. Because he "wasted his youth" before learning mathematics, he probably ranked behind the Bernoullis as well as Newton in pure mathematical talent, and thus he may be the only mathematician among the Top Ten who was never the greatest living algorist or theorem prover. We won't try to summarize Leibniz' contributions to philosophy and diverse other fields including biology; as just three examples: he predicted the Earth's molten core, introduced the notion of subconscious mind, and built the first calculator that could do multiplication. (And his political influence may have been huge: he was a special consultant to both the Holy Roman and Russian Emperors, and was helped arrange for the son of his patron Sophia Wittelsbach, only distantly in line for the British throne, to be crowned King George I of England.)
6
+
7
+ Leibniz pioneered the common discourse of mathematics, including its continuous, discrete, and symbolic aspects. (His ideas on symbolic logic weren't pursued and it was left to Boole to reinvent this almost two centuries later.) Mathematical innovations attributed to Leibniz include the symbols ∫, df(x)/dx; the concepts of matrix determinant and Gaussian elimination; the theory of geometric envelopes; and the binary number system. He invented more mathematical terms than anyone, including "function," "analysis situ," "variable," "abscissa," "parameter," and "coordinate." His works seem to anticipate cybernetics and information theory; and Mandelbrot acknowledged Leibniz' anticipation of self-similarity. Like Newton, Leibniz discovered The Fundamental Theorem of Calculus; his contribution to calculus was much more influential than Newton's, and his superior notation is used to this day. As Leibniz himself pointed out, since the concept of mathematical analysis was already known to ancient Greeks, the revolutionary invention was notation ("calculus"), because with "symbols [which] express the exact nature of a thing briefly ... the labor of thought is wonderfully diminished."
8
+
9
+ Leibniz' thoughts on mathematical physics had some influence. He developed laws of motion that gave different insights from those of Newton. His cosmology was opposed to that of Newton but, anticipating theories of Mach and Einstein, is more in accord with modern physics. Mathematical physicists influenced by Leibniz include not only Mach, but perhaps Hamilton and Poincaré themselves.
10
+
11
+ Although others found it independently (including perhaps Madhava three centuries earlier), Leibniz discovered and proved a striking identity for π:
12
+
13
+ π/4 = 1 - 1/3 + 1/5 - 1/7 + 1/9 - ...
data/spec/sandbox.rb ADDED
@@ -0,0 +1,5 @@
1
+ require_relative '../lib/treat'
2
+
3
+ c = Collection (Treat.spec + 'samples/mathematicians')
4
+ c.do :chunk, :segment, :tokenize, :tf_idf, :keywords
5
+ c.visualize :dot, :file => 'test2.dot', :remove_types => [:paragraph]
data/spec/token.rb ADDED
@@ -0,0 +1,109 @@
1
+ #encoding: utf-8
2
+ require_relative '../lib/treat'
3
+
4
+ describe Treat::Entities::Token do
5
+
6
+ describe "Buildable" do
7
+
8
+ describe "#build" do
9
+
10
+ context "when supplied with a word" do
11
+ it "creates a word with the text" do
12
+ t = Treat::Entities::Token.build('word')
13
+ t.should be_an_instance_of Treat::Entities::Word
14
+ t.to_s.should eql 'word'
15
+ end
16
+ end
17
+
18
+ context "when supplied with a number or a string representing a numerical quantity" do
19
+ it "creates a number" do
20
+ t = Treat::Entities::Token.build(2)
21
+ t2 = Treat::Entities::Token.build(2.2)
22
+ t3 = Treat::Entities::Token.build('2')
23
+ t4 = Treat::Entities::Token.build('2.2')
24
+ t.should be_an_instance_of Treat::Entities::Number
25
+ t2.should be_an_instance_of Treat::Entities::Number
26
+ t3.should be_an_instance_of Treat::Entities::Number
27
+ t4.should be_an_instance_of Treat::Entities::Number
28
+ t.to_i.should eql 2
29
+ t2.to_i.should eql 2
30
+ t3.to_i.should eql 2
31
+ t4.to_i.should eql 2
32
+ t.to_f.should eql 2.0
33
+ t2.to_f.should eql 2.2
34
+ t3.to_f.should eql 2.0
35
+ t4.to_f.should eql 2.2
36
+ end
37
+ end
38
+
39
+ context "when supplied with a punctuation character" do
40
+ it "creates a punctuation with the text" do
41
+ t = Treat::Entities::Token.build('.')
42
+ t.should be_an_instance_of Treat::Entities::Punctuation
43
+ end
44
+ end
45
+
46
+ context "when supplied with a symbol character" do
47
+ it "creates a symbol with the text" do
48
+ t = Treat::Entities::Token.build('¨')
49
+ t.should be_an_instance_of Treat::Entities::Symbol
50
+ end
51
+ end
52
+
53
+ end
54
+
55
+ end
56
+
57
+ describe "Lexicalizable" do
58
+
59
+ before do
60
+ @lexicalizers = Treat::Languages::English::Lexicalizers
61
+ end
62
+
63
+ describe "#tag" do
64
+
65
+ it "returns the tag of the token" do
66
+ @lexicalizers[:taggers].each do |t|
67
+ 'man'.tag(t).should eql 'NN'
68
+ '2'.tag(t).should eql 'CD'
69
+ '.'.tag(t).should eql '.'
70
+ '$'.tag(t).should eql '$'
71
+ end
72
+ end
73
+
74
+ end
75
+
76
+ describe "#category" do
77
+
78
+ context "when called on a word" do
79
+ it "returns the general part of speech of " +
80
+ "the word as a lowercase symbol" do
81
+ @lexicalizers[:categorizers].each do |c|
82
+ 'man'.category(c).should eql :noun
83
+ end
84
+ end
85
+ end
86
+
87
+ context "when called on a number" do
88
+ it "returns :number" do
89
+ @lexicalizers[:categorizers].each do |c|
90
+ '2'.category(c).should eql :number
91
+ end
92
+ end
93
+ end
94
+
95
+ context "when called on a punctuation or symbol" do
96
+ it "returns the type of punctuation or symbol" +
97
+ "as a lowercase identifier" do
98
+ @lexicalizers[:categorizers].each do |c|
99
+ '$'.category(c).should eql :dollar
100
+ '.'.category(c).should eql :period
101
+ end
102
+ end
103
+ end
104
+
105
+ end
106
+
107
+ end
108
+
109
+ end
data/spec/treat.rb ADDED
@@ -0,0 +1,52 @@
1
+ require_relative '../lib/treat'
2
+
3
+ describe Treat do
4
+
5
+ describe "Syntactic sugar:"
6
+
7
+ describe "#sweeten!, #unsweeten!" do
8
+
9
+ it "respectively turn on and off syntactic sugar and " +
10
+ "define/undefine entity builders as uppercase methods " +
11
+ "in the global namespace" do
12
+
13
+ Treat::Entities.list.each do |type|
14
+
15
+ next if type == :symbol
16
+
17
+ Treat.sweeten!
18
+ Treat.sweetened?.should eql true
19
+
20
+
21
+ Object.method_defined?(
22
+ :"#{type.to_s.capitalize}").
23
+ should eql true
24
+
25
+ Treat.unsweeten!
26
+ Treat.sweetened?.should eql false
27
+
28
+ Object.method_defined?(
29
+ :"#{type.to_s.capitalize}").
30
+ should eql false
31
+
32
+ end
33
+
34
+ end
35
+
36
+ end
37
+
38
+ describe "Paths:" do
39
+
40
+ paths = Treat::Paths
41
+ # Check IO for bin, files, tmp, models. Fix.
42
+ paths.each do |path, files|
43
+ describe "##{path}" do
44
+ it "provides the path to the #{files}" do
45
+ Treat.send(path).should be_instance_of String
46
+ end
47
+ end
48
+ end
49
+
50
+ end
51
+
52
+ end
data/spec/tree.rb ADDED
@@ -0,0 +1,117 @@
1
+ require_relative '../lib/treat'
2
+
3
+ describe Treat::Tree do
4
+
5
+ before :each do
6
+ @root = Treat::Tree::Node.new('root node', 'root')
7
+ @branch = Treat::Tree::Node.new('branch node', 'branch')
8
+ @sibling = Treat::Tree::Node.new('sibling node', 'sibling')
9
+ @leaf = Treat::Tree::Node.new('leaf node', 'leaf')
10
+ @root << @branch << @leaf
11
+ @root << @sibling
12
+
13
+ @leaf.link(@sibling, 'some dependency')
14
+ @leaf.set :some_feature, 'value'
15
+
16
+ end
17
+
18
+ describe "#[]" do
19
+ it "allows traversal of the tree by node ID" do
20
+ @root['branch'].should eql @branch
21
+ @root['branch']['leaf'].should eql @leaf
22
+ end
23
+ end
24
+
25
+ =begin
26
+ describe "#right, #left" do
27
+ it "return the right/left sibling from the same parent node"
28
+ @branch.right.should eql @sibling
29
+ @sibling.left.should eql @branch
30
+ end
31
+
32
+ describe "#remove!" do
33
+ it "removes a children by instance or ID and returns it" do
34
+ @root.remove!(@sibling).should eql @sibling
35
+ @root.size.should eql 3
36
+ @root.remove!(@branch.id).should eql @branch
37
+ @root.size.should eql 2
38
+ end
39
+ end
40
+
41
+ describe "#remove_all!" do
42
+ it "removes all a node's children"
43
+ @branch.remove_all!.size.should eql 0
44
+ end
45
+
46
+ =end
47
+
48
+ describe "#set(feature, value) and #get(feature)" do
49
+ it "set and get a feature in the @features hash" do
50
+ @root.set :foo, true
51
+ @root.get(:foo).should eql true
52
+ end
53
+ end
54
+
55
+ describe "#size" do
56
+ it "returns the total number of nodes in the tree" do
57
+ @root.size.should eql 4
58
+ end
59
+ end
60
+
61
+ describe "#id" do
62
+ it "returns the unique ID of the node" do
63
+ @root.id.should eql 'root'
64
+ @branch.id.should eql 'branch'
65
+ @leaf.id.should eql 'leaf'
66
+ end
67
+ end
68
+
69
+ describe "#value" do
70
+ it "contains the string value of the node" do
71
+ @root.value.should eql 'root node'
72
+ @branch.value.should eql 'branch node'
73
+ @leaf.value.should eql 'leaf node'
74
+ end
75
+ end
76
+
77
+ describe "#has_children?" do
78
+ it "tells whether the node has children or not" do
79
+ @root.has_children?.should eql true
80
+ @branch.has_children?.should eql true
81
+ @leaf.has_children?.should eql false
82
+ end
83
+ end
84
+
85
+ describe "#has_parent?" do
86
+ it "tells whether the node has a parent or not" do
87
+ @root.has_parent?.should eql false
88
+ @branch.has_parent?.should eql true
89
+ @leaf.has_parent?.should eql true
90
+ end
91
+ end
92
+
93
+ describe "#has_children?" do
94
+ it "tells whether the node has children or not" do
95
+ @root.has_children?.should eql true
96
+ @branch.has_children?.should eql true
97
+ @leaf.has_children?.should eql false
98
+ end
99
+ end
100
+
101
+ describe "#has_features?" do
102
+ it "tells whether the node has children or not" do
103
+ @root.has_features?.should eql false
104
+ @branch.has_features?.should eql false
105
+ @leaf.has_features?.should eql true
106
+ end
107
+ end
108
+
109
+ describe "#has_dependencies?" do
110
+ it "tells whether the node has dependencies or not" do
111
+ @root.has_dependencies?.should eql false
112
+ @branch.has_dependencies?.should eql false
113
+ @leaf.has_dependencies?.should eql true
114
+ end
115
+ end
116
+
117
+ end