treat 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (147) hide show
  1. data/INSTALL +0 -0
  2. data/LICENSE +28 -0
  3. data/README +0 -0
  4. data/TODO +67 -0
  5. data/bin/INFO +1 -0
  6. data/examples/benchmark.rb +81 -0
  7. data/examples/keywords.rb +60 -0
  8. data/examples/texts/bugged_out.txt +26 -0
  9. data/examples/texts/half_cocked_basel.txt +16 -0
  10. data/examples/texts/hedge_funds.txt +24 -0
  11. data/examples/texts/hose_and_dry.txt +19 -0
  12. data/examples/texts/hungarys_troubles.txt +46 -0
  13. data/examples/texts/indias_slowdown.txt +15 -0
  14. data/examples/texts/merkozy_rides_again.txt +24 -0
  15. data/examples/texts/prada_is_not_walmart.txt +9 -0
  16. data/examples/texts/republican_nomination.txt +26 -0
  17. data/examples/texts/to_infinity_and_beyond.txt +15 -0
  18. data/lib/treat.rb +91 -0
  19. data/lib/treat/buildable.rb +115 -0
  20. data/lib/treat/categories.rb +29 -0
  21. data/lib/treat/category.rb +28 -0
  22. data/lib/treat/delegatable.rb +90 -0
  23. data/lib/treat/detectors.rb +28 -0
  24. data/lib/treat/detectors/encoding/native.rb +12 -0
  25. data/lib/treat/detectors/encoding/r_chardet19.rb +24 -0
  26. data/lib/treat/detectors/format/file.rb +36 -0
  27. data/lib/treat/detectors/language/language_detector.rb +19 -0
  28. data/lib/treat/detectors/language/what_language.rb +29 -0
  29. data/lib/treat/entities.rb +52 -0
  30. data/lib/treat/entities/collection.rb +19 -0
  31. data/lib/treat/entities/constituents.rb +15 -0
  32. data/lib/treat/entities/document.rb +11 -0
  33. data/lib/treat/entities/entity.rb +242 -0
  34. data/lib/treat/entities/sentence.rb +8 -0
  35. data/lib/treat/entities/text.rb +7 -0
  36. data/lib/treat/entities/tokens.rb +37 -0
  37. data/lib/treat/entities/zones.rb +17 -0
  38. data/lib/treat/exception.rb +5 -0
  39. data/lib/treat/extractors.rb +41 -0
  40. data/lib/treat/extractors/key_sentences/topics_frequency.rb +49 -0
  41. data/lib/treat/extractors/named_entity/abner.rb +20 -0
  42. data/lib/treat/extractors/named_entity/stanford.rb +174 -0
  43. data/lib/treat/extractors/statistics/frequency.rb +22 -0
  44. data/lib/treat/extractors/statistics/frequency_of.rb +17 -0
  45. data/lib/treat/extractors/statistics/position_in.rb +13 -0
  46. data/lib/treat/extractors/statistics/transition_matrix.rb +105 -0
  47. data/lib/treat/extractors/statistics/transition_probability.rb +53 -0
  48. data/lib/treat/extractors/time/chronic.rb +12 -0
  49. data/lib/treat/extractors/time/native.rb +12 -0
  50. data/lib/treat/extractors/time/nickel.rb +45 -0
  51. data/lib/treat/extractors/topic_words/lda.rb +71 -0
  52. data/lib/treat/extractors/topic_words/lda/data.dat +46 -0
  53. data/lib/treat/extractors/topic_words/lda/wiki.yml +121 -0
  54. data/lib/treat/extractors/topics/reuters.rb +91 -0
  55. data/lib/treat/extractors/topics/reuters/industry.xml +2717 -0
  56. data/lib/treat/extractors/topics/reuters/region.xml +13585 -0
  57. data/lib/treat/extractors/topics/reuters/topics.xml +17977 -0
  58. data/lib/treat/feature.rb +53 -0
  59. data/lib/treat/formatters.rb +44 -0
  60. data/lib/treat/formatters/cleaners/html.rb +17 -0
  61. data/lib/treat/formatters/readers/autoselect.rb +35 -0
  62. data/lib/treat/formatters/readers/gocr.rb +24 -0
  63. data/lib/treat/formatters/readers/html.rb +13 -0
  64. data/lib/treat/formatters/readers/ocropus.rb +31 -0
  65. data/lib/treat/formatters/readers/pdf.rb +17 -0
  66. data/lib/treat/formatters/readers/txt.rb +15 -0
  67. data/lib/treat/formatters/serializers/xml.rb +48 -0
  68. data/lib/treat/formatters/serializers/yaml.rb +15 -0
  69. data/lib/treat/formatters/serializers/yaml/helper.rb +96 -0
  70. data/lib/treat/formatters/unserializers/autoselect.rb +19 -0
  71. data/lib/treat/formatters/unserializers/xml.rb +79 -0
  72. data/lib/treat/formatters/unserializers/yaml.rb +15 -0
  73. data/lib/treat/formatters/visualizers/dot.rb +73 -0
  74. data/lib/treat/formatters/visualizers/html.rb +12 -0
  75. data/lib/treat/formatters/visualizers/inspect.rb +16 -0
  76. data/lib/treat/formatters/visualizers/short_value.rb +14 -0
  77. data/lib/treat/formatters/visualizers/standoff.rb +41 -0
  78. data/lib/treat/formatters/visualizers/tree.rb +28 -0
  79. data/lib/treat/formatters/visualizers/txt.rb +31 -0
  80. data/lib/treat/group.rb +96 -0
  81. data/lib/treat/inflectors.rb +50 -0
  82. data/lib/treat/inflectors/cardinal_words/linguistics.rb +45 -0
  83. data/lib/treat/inflectors/conjugators/linguistics.rb +30 -0
  84. data/lib/treat/inflectors/declensors/en.rb +18 -0
  85. data/lib/treat/inflectors/declensors/linguistics.rb +30 -0
  86. data/lib/treat/inflectors/lemmatizers/e_lemma.rb +12 -0
  87. data/lib/treat/inflectors/lemmatizers/e_lemma/Makefile +213 -0
  88. data/lib/treat/inflectors/lemmatizers/e_lemma/elemma.c +68 -0
  89. data/lib/treat/inflectors/lemmatizers/e_lemma/extconf.rb +6 -0
  90. data/lib/treat/inflectors/ordinal_words/linguistics.rb +21 -0
  91. data/lib/treat/inflectors/stemmers/porter.rb +158 -0
  92. data/lib/treat/inflectors/stemmers/porter_c.rb +23 -0
  93. data/lib/treat/inflectors/stemmers/uea.rb +30 -0
  94. data/lib/treat/lexicalizers.rb +49 -0
  95. data/lib/treat/lexicalizers/category/from_tag.rb +30 -0
  96. data/lib/treat/lexicalizers/linkages/naive.rb +63 -0
  97. data/lib/treat/lexicalizers/synsets/rita_wn.rb +23 -0
  98. data/lib/treat/lexicalizers/synsets/wordnet.rb +72 -0
  99. data/lib/treat/lexicalizers/tag/brill.rb +101 -0
  100. data/lib/treat/lexicalizers/tag/lingua.rb +114 -0
  101. data/lib/treat/lexicalizers/tag/stanford.rb +86 -0
  102. data/lib/treat/processors.rb +45 -0
  103. data/lib/treat/processors/chunkers/txt.rb +27 -0
  104. data/lib/treat/processors/parsers/enju.rb +214 -0
  105. data/lib/treat/processors/parsers/stanford.rb +60 -0
  106. data/lib/treat/processors/segmenters/punkt.rb +48 -0
  107. data/lib/treat/processors/segmenters/stanford.rb +45 -0
  108. data/lib/treat/processors/segmenters/tactful.rb +34 -0
  109. data/lib/treat/processors/tokenizers/macintyre.rb +76 -0
  110. data/lib/treat/processors/tokenizers/multilingual.rb +31 -0
  111. data/lib/treat/processors/tokenizers/perl.rb +96 -0
  112. data/lib/treat/processors/tokenizers/punkt.rb +42 -0
  113. data/lib/treat/processors/tokenizers/stanford.rb +33 -0
  114. data/lib/treat/processors/tokenizers/tactful.rb +59 -0
  115. data/lib/treat/proxies.rb +66 -0
  116. data/lib/treat/registrable.rb +26 -0
  117. data/lib/treat/resources.rb +10 -0
  118. data/lib/treat/resources/categories.rb +18 -0
  119. data/lib/treat/resources/delegates.rb +96 -0
  120. data/lib/treat/resources/dependencies.rb +0 -0
  121. data/lib/treat/resources/edges.rb +8 -0
  122. data/lib/treat/resources/formats.rb +23 -0
  123. data/lib/treat/resources/languages.rb +86 -0
  124. data/lib/treat/resources/languages.txt +504 -0
  125. data/lib/treat/resources/tags.rb +393 -0
  126. data/lib/treat/sugar.rb +43 -0
  127. data/lib/treat/tree.rb +174 -0
  128. data/lib/treat/utilities.rb +127 -0
  129. data/lib/treat/visitable.rb +27 -0
  130. data/test/profile.rb +2 -0
  131. data/test/tc_detectors.rb +27 -0
  132. data/test/tc_entity.rb +105 -0
  133. data/test/tc_extractors.rb +48 -0
  134. data/test/tc_formatters.rb +46 -0
  135. data/test/tc_inflectors.rb +39 -0
  136. data/test/tc_lexicalizers.rb +39 -0
  137. data/test/tc_processors.rb +36 -0
  138. data/test/tc_resources.rb +27 -0
  139. data/test/tc_treat.rb +64 -0
  140. data/test/tc_tree.rb +60 -0
  141. data/test/tests.rb +19 -0
  142. data/test/texts.rb +20 -0
  143. data/test/texts/english/long.html +24 -0
  144. data/test/texts/english/long.txt +22 -0
  145. data/test/texts/english/medium.txt +5 -0
  146. data/test/texts/english/short.txt +3 -0
  147. metadata +412 -0
@@ -0,0 +1,36 @@
1
+ module Treat
2
+ module Tests
3
+ class TestProcessors < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @doc = Treat::Tests::EnglishShortDoc
7
+ end
8
+
9
+ def test_tokenizers
10
+ assert_nothing_raised { @doc.tokenize(:macintyre) }
11
+ assert_nothing_raised { @doc.tokenize(:multilingual) }
12
+ assert_nothing_raised { @doc.tokenize(:perl) }
13
+ assert_nothing_raised { @doc.tokenize(:punkt) }
14
+ assert_nothing_raised { @doc.tokenize(:stanford) }
15
+ assert_nothing_raised { @doc.tokenize(:tactful) }
16
+ end
17
+
18
+ def test_segmenters
19
+ assert_nothing_raised { @doc.segment(:punkt) }
20
+ assert_nothing_raised { @doc.segment(:stanford) }
21
+ assert_nothing_raised { @doc.segment(:tactful) }
22
+ end
23
+
24
+ def test_chunkers
25
+ assert_nothing_raised { @doc.chunk(:txt) }
26
+ end
27
+
28
+ def test_parsers
29
+ assert_nothing_raised { @doc.segment.parse(:enju) }
30
+ assert_nothing_raised { @doc.segment.parse(:stanford) }
31
+ end
32
+
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,27 @@
1
+ module Treat
2
+ module Tests
3
+ class TestResources < Test::Unit::TestCase
4
+
5
+ def test_languages
6
+ assert_equal :eng, Treat::Resources::Languages.find(:english, 2)
7
+ assert_equal :en, Treat::Resources::Languages.find(:english, 1)
8
+ assert_equal :english, Treat::Resources::Languages.describe(:eng)
9
+ assert_equal :english, Treat::Resources::Languages.describe(:en)
10
+ end
11
+
12
+ def test_tags
13
+
14
+ end
15
+
16
+ def test_dependencies
17
+
18
+ end
19
+
20
+ def test_edges
21
+
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,64 @@
1
+ module Treat
2
+ module Tests
3
+ class TestTreat < Test::Unit::TestCase
4
+
5
+ def test_edulcoration
6
+ Treat.edulcorate
7
+ assert_equal true, Treat.edulcorated?
8
+ Treat::Entities.list.each do |klass|
9
+ next if klass == :symbol
10
+ assert_nothing_raised do
11
+ begin
12
+ Object.send(:"#{klass.to_s.capitalize}")
13
+ rescue Treat::Exception
14
+ next
15
+ rescue
16
+ raise
17
+ end
18
+ end
19
+ end
20
+ Treat.unedulcorate
21
+ assert_equal false, Treat.edulcorated?
22
+ Treat::Entities.list.each do |klass|
23
+ next if klass == :symbol
24
+ assert_raise(NoMethodError) do
25
+ Object.send(:"#{klass.to_s.capitalize}")
26
+ end
27
+ end
28
+ end
29
+
30
+ def test_modules_loaded?
31
+ ['exception',
32
+ 'utilities',
33
+ 'resources',
34
+ 'entities',
35
+ 'feature',
36
+ 'category',
37
+ 'group',
38
+ 'detectors',
39
+ 'formatters',
40
+ 'processors',
41
+ 'lexicalizers',
42
+ 'extractors',
43
+ 'inflectors',
44
+ 'proxies'].each do |klass|
45
+ assert_nothing_raised do
46
+ Treat.const_get klass.capitalize
47
+ end
48
+ end
49
+ end
50
+
51
+ def test_paths
52
+ assert_not_nil Treat.lib
53
+ assert_not_nil Treat.bin
54
+ assert_not_nil Treat.test
55
+ assert_not_nil Treat.tmp
56
+ end
57
+
58
+ def test_file_permissions
59
+ assert_equal true, File.writable?(Treat.lib + '/../tmp')
60
+ end
61
+
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,60 @@
1
+ module Treat
2
+ module Tests
3
+ class TestTree < Test::Unit::TestCase
4
+ def setup
5
+ @root = Treat::Tree::Node.new('root node', 'root')
6
+ @branch = Treat::Tree::Node.new('branch node', 'branch')
7
+ @sibling = Treat::Tree::Node.new('sibling node', 'sibling')
8
+ @leaf = Treat::Tree::Node.new('leaf node', 'leaf')
9
+ @root << @branch << @leaf
10
+ @root << @sibling
11
+ @leaf.associate(@sibling, 'some dependency')
12
+ end
13
+ def test_branching
14
+ assert_equal 2, @root.children.size
15
+ assert_equal 4, @root.size
16
+
17
+ assert_equal @branch, @root['branch']
18
+ assert_equal @leaf, @root['branch']['leaf']
19
+ assert_equal @sibling, @branch.right
20
+
21
+ assert_equal @root, @root['branch'].parent
22
+ assert_equal [@sibling], @branch.siblings
23
+
24
+ assert_equal @root, @leaf.root
25
+ end
26
+ def test_removal
27
+ assert_equal 1, @branch.remove_all!.size
28
+ assert_equal @sibling, @root.remove!(@sibling)
29
+ assert_equal @branch, @root.remove!(@branch.id)
30
+ end
31
+ def test_properties
32
+
33
+ assert_equal 'root', @root.id
34
+ assert_equal 'branch', @branch.id
35
+ assert_equal 'leaf', @leaf.id
36
+
37
+ assert_equal 'root node', @root.value
38
+ assert_equal 'branch node', @branch.value
39
+ assert_equal 'leaf node', @leaf.value
40
+
41
+ assert_equal false, @root.has_features?
42
+ assert_equal false, @branch.has_features?
43
+ assert_equal false, @leaf.has_features?
44
+
45
+ assert_equal true, @root.has_children?
46
+ assert_equal true, @branch.has_children?
47
+ assert_equal false, @leaf.has_children?
48
+
49
+ assert_equal false, @root.has_parent?
50
+ assert_equal true, @branch.has_parent?
51
+ assert_equal true, @leaf.has_parent?
52
+
53
+ assert_equal false, @root.has_edges?
54
+ assert_equal false, @branch.has_edges?
55
+ assert_equal true, @leaf.has_edges?
56
+
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,19 @@
1
+ require 'test/unit'
2
+
3
+ # $LOAD_PATH << '/ruby/treat/test/' # Remove for production
4
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
5
+
6
+ require 'treat'
7
+ require 'texts'
8
+
9
+ require 'tc_treat'
10
+ require 'tc_tree'
11
+ require 'tc_entity'
12
+ require 'tc_resources'
13
+
14
+ require 'tc_detectors'
15
+ require 'tc_formatters'
16
+ require 'tc_inflectors'
17
+ require 'tc_lexicalizers'
18
+ require 'tc_processors'
19
+ require 'tc_extractors'
@@ -0,0 +1,20 @@
1
+ module Treat
2
+ module Tests
3
+
4
+ EnglishHtmlDoc = Treat::Entities::Document(Treat.test + 'texts/english/long.html')
5
+ EnglishLongDoc = Treat::Entities::Document(Treat.test + 'texts/english/long.txt')
6
+ EnglishMediumDoc = Treat::Entities::Document(Treat.test + 'texts/english/medium.txt')
7
+ EnglishShortDoc = Treat::Entities::Document(Treat.test + 'texts/english/short.txt')
8
+
9
+ EnglishTime = Treat::Entities::Phrase('5 PM')
10
+ EnglishDate = Treat::Entities::Phrase('this tuesday')
11
+
12
+ EnglishSentence = Treat::Entities::Sentence('The quick brown fox jumped over the lazy dog.')
13
+
14
+ EnglishVerb = Treat::Entities::Word('run'); EnglishVerb.set :category, :verb
15
+ EnglishWord = Treat::Entities::Word('running')
16
+ EnglishNoun = Treat::Entities::Word('captain')
17
+ Number = Treat::Entities::Number(20)
18
+
19
+ end
20
+ end
@@ -0,0 +1,24 @@
1
+ <html>
2
+ A short biography of Michel Foucault
3
+
4
+ Michel Foucault, born Paul-Michel Foucault (15 October 1926 – 25 June 1984), was a French philosopher, social theorist and historian of ideas. He held a chair at the College de France with the title \"History of Systems of Thought,\" and lectured at the University at Buffalo and the University of California, Berkeley.
5
+
6
+ Early life
7
+
8
+ Paul-Michel Foucault was born on 15 October 1926 in Poitiers, France, to a notable provincial family. His father, Paul Foucault, was an eminent surgeon and hoped his son would join him in the profession. His early education was a mix of success and mediocrity until he attended the Jesuit Collège Saint-Stanislas, where he excelled. During this period, Poitiers was part of Vichy France and later came under German occupation. Foucault learned philosophy with Louis Girard.
9
+ After World War II, Foucault was admitted to the prestigious École Normale Supérieure (rue d'Ulm), the traditional gateway to an academic career in the humanities in France.
10
+
11
+ The École Normale Supérieure
12
+
13
+ Foucault's personal life during the École Normale was difficult — he suffered from acute depression due to distress over his homosexuality and made several suicide attempts. As a result, he was taken to see a psychiatrist. During this time, Foucault became fascinated with psychology. He earned a licence (degree equivalent to BA) in psychology, a very new qualification in France at the time, in addition to a degree in philosophy, in 1952. He was involved in clinical psychology, which exposed him to thinkers such as Ludwig Binswanger.
14
+
15
+ Foucault was a member of the French Communist Party from 1950 to 1953. He was inducted into the party by his mentor Louis Althusser, but soon became disillusioned with both the politics and the philosophy of the party. Historian Emmanuel Le Roy Ladurie reported that Foucault never actively participated in his cell, unlike many of his fellow party members.[citation needed]
16
+
17
+ Early career
18
+
19
+ Foucault failed at the agrégation in 1950 but took it again and succeeded the following year. After a brief period lecturing at the École Normale, he took up a position at the Université Lille Nord de France, where from 1953 to 1954 he taught psychology. In 1954 Foucault published his first book, Maladie mentale et personnalité, a work he later disavowed. At this point, Foucault was not interested in a teaching career, and undertook a lengthy exile from France. In 1954 he served France as a cultural delegate to the University of Uppsala in Sweden (a position arranged for him by Georges Dumézil, who was to become a friend and mentor). He submitted his doctoral thesis in Uppsala, but it was rejected there. In 1958 Foucault left Uppsala and briefly held positions at Warsaw University and at the University of Hamburg.
20
+
21
+ Foucault returned to France in 1960 to complete his doctorate and take up a post in philosophy at the University of Clermont-Ferrand. There he met philosopher Daniel Defert, who would become his partner of twenty years. In 1961 he earned his doctorate by submitting two theses (as is customary in France): a "major" thesis entitled Folie et déraison: Histoire de la folie à l'âge classique (Madness and Insanity: History of Madness in the Classical Age) and a "secondary" thesis that involved a translation of, and commentary on Kant's Anthropology from a Pragmatic Point of View. Folie et déraison (Madness and Insanity – published in an abridged edition in English as Madness and Civilization and finally published unabridged as "History of Madness" by Routledge in 2006) was extremely well received. Foucault continued a vigorous publishing schedule. In 1963 he published Naissance de la Clinique (Birth of the Clinic), Raymond Roussel, and a reissue of his 1954 volume (now entitled Maladie mentale et psychologie or, in English, "Mental Illness and Psychology"), which again, he later disavowed.
22
+
23
+ After Defert was posted to Tunisia for his military service, Foucault moved to a position at the University of Tunis in 1965. He published Les Mots et les choses (The Order of Things) during the height of interest in structuralism in 1966, and Foucault was quickly grouped with scholars such as Jacques Lacan, Claude Lévi-Strauss, and Roland Barthes as the newest, latest wave of thinkers set to topple the existentialism popularized by Jean-Paul Sartre. Foucault made a number of skeptical comments about Marxism, which outraged a number of left wing critics, but later firmly rejected the "structuralist" label. He was still in Tunis during the May 1968 student riots, where he was profoundly affected by a local student revolt earlier in the same year. In the Autumn of 1968 he returned to France, where he published L'archéologie du savoir (The Archaeology of Knowledge) – a methodological treatise that included a response to his critics – in 1969.
24
+ </html>
@@ -0,0 +1,22 @@
1
+ A short biography of Michel Foucault
2
+
3
+ Michel Foucault, born Paul-Michel Foucault (15 October 1926 – 25 June 1984), was a French philosopher, social theorist and historian of ideas. He held a chair at the College de France with the title \"History of Systems of Thought,\" and lectured at the University at Buffalo and the University of California, Berkeley.
4
+
5
+ Early life
6
+
7
+ Paul-Michel Foucault was born on 15 October 1926 in Poitiers, France, to a notable provincial family. His father, Paul Foucault, was an eminent surgeon and hoped his son would join him in the profession. His early education was a mix of success and mediocrity until he attended the Jesuit Collège Saint-Stanislas, where he excelled. During this period, Poitiers was part of Vichy France and later came under German occupation. Foucault learned philosophy with Louis Girard.
8
+ After World War II, Foucault was admitted to the prestigious École Normale Supérieure (rue d'Ulm), the traditional gateway to an academic career in the humanities in France.
9
+
10
+ The École Normale Supérieure
11
+
12
+ Foucault's personal life during the École Normale was difficult — he suffered from acute depression due to distress over his homosexuality and made several suicide attempts. As a result, he was taken to see a psychiatrist. During this time, Foucault became fascinated with psychology. He earned a licence (degree equivalent to BA) in psychology, a very new qualification in France at the time, in addition to a degree in philosophy, in 1952. He was involved in clinical psychology, which exposed him to thinkers such as Ludwig Binswanger.
13
+
14
+ Foucault was a member of the French Communist Party from 1950 to 1953. He was inducted into the party by his mentor Louis Althusser, but soon became disillusioned with both the politics and the philosophy of the party. Historian Emmanuel Le Roy Ladurie reported that Foucault never actively participated in his cell, unlike many of his fellow party members.[citation needed]
15
+
16
+ Early career
17
+
18
+ Foucault failed at the agrégation in 1950 but took it again and succeeded the following year. After a brief period lecturing at the École Normale, he took up a position at the Université Lille Nord de France, where from 1953 to 1954 he taught psychology. In 1954 Foucault published his first book, Maladie mentale et personnalité, a work he later disavowed. At this point, Foucault was not interested in a teaching career, and undertook a lengthy exile from France. In 1954 he served France as a cultural delegate to the University of Uppsala in Sweden (a position arranged for him by Georges Dumézil, who was to become a friend and mentor). He submitted his doctoral thesis in Uppsala, but it was rejected there. In 1958 Foucault left Uppsala and briefly held positions at Warsaw University and at the University of Hamburg.
19
+
20
+ Foucault returned to France in 1960 to complete his doctorate and take up a post in philosophy at the University of Clermont-Ferrand. There he met philosopher Daniel Defert, who would become his partner of twenty years. In 1961 he earned his doctorate by submitting two theses (as is customary in France): a "major" thesis entitled Folie et déraison: Histoire de la folie à l'âge classique (Madness and Insanity: History of Madness in the Classical Age) and a "secondary" thesis that involved a translation of, and commentary on Kant's Anthropology from a Pragmatic Point of View. Folie et déraison (Madness and Insanity – published in an abridged edition in English as Madness and Civilization and finally published unabridged as "History of Madness" by Routledge in 2006) was extremely well received. Foucault continued a vigorous publishing schedule. In 1963 he published Naissance de la Clinique (Birth of the Clinic), Raymond Roussel, and a reissue of his 1954 volume (now entitled Maladie mentale et psychologie or, in English, "Mental Illness and Psychology"), which again, he later disavowed.
21
+
22
+ After Defert was posted to Tunisia for his military service, Foucault moved to a position at the University of Tunis in 1965. He published Les Mots et les choses (The Order of Things) during the height of interest in structuralism in 1966, and Foucault was quickly grouped with scholars such as Jacques Lacan, Claude Lévi-Strauss, and Roland Barthes as the newest, latest wave of thinkers set to topple the existentialism popularized by Jean-Paul Sartre. Foucault made a number of skeptical comments about Marxism, which outraged a number of left wing critics, but later firmly rejected the "structuralist" label. He was still in Tunis during the May 1968 student riots, where he was profoundly affected by a local student revolt earlier in the same year. In the Autumn of 1968 he returned to France, where he published L'archéologie du savoir (The Archaeology of Knowledge) – a methodological treatise that included a response to his critics – in 1969.
@@ -0,0 +1,5 @@
1
+ A short biography of Michel Foucault
2
+
3
+ Michel Foucault, born Paul-Michel Foucault (15 October 1926 – 25 June 1984), was a French philosopher, social theorist and historian of ideas. He held a chair at the College de France with the title \"History of Systems of Thought,\" and lectured at the University at Buffalo and the University of California, Berkeley.
4
+
5
+ Foucault is best known for his critical studies of social institutions, most notably psychiatry, medicine, the human sciences and the prison system, as well as for his work on the history of human sexuality. His writings on power, knowledge, and discourse have been widely influential in academic circles. In the 1960s Foucault was associated with structuralism, a movement from which he distanced himself. Foucault also rejected the poststructuralist and postmodernist labels later attributed to him, preferring to classify his thought as a critical history of modernity rooted in Immanuel Kant. Foucault's project was particularly influenced by Nietzsche, his \"genealogy of knowledge\" being a direct allusion to Nietzsche's \"genealogy of morality\". In a late interview he definitively stated: \"I am a Nietzschean.\"
@@ -0,0 +1,3 @@
1
+ "A short biography of Michel Foucault
2
+
3
+ Michel Foucault, born Paul-Michel Foucault (15 October 1926 – 25 June 1984), was a French philosopher, social theorist and historian of ideas. He held a chair at the College de France with the title \"History of Systems of Thought,\" and lectured at the University at Buffalo and the University of California, Berkeley.
metadata ADDED
@@ -0,0 +1,412 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: treat
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Louis Mullie
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-01-24 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rjb
16
+ requirement: &70198460348640 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70198460348640
25
+ - !ruby/object:Gem::Dependency
26
+ name: nokogiri
27
+ requirement: &70198460358020 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70198460358020
36
+ - !ruby/object:Gem::Dependency
37
+ name: chronic
38
+ requirement: &70198460355160 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *70198460355160
47
+ - !ruby/object:Gem::Dependency
48
+ name: hpricot
49
+ requirement: &70198460353240 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ type: :runtime
56
+ prerelease: false
57
+ version_requirements: *70198460353240
58
+ - !ruby/object:Gem::Dependency
59
+ name: psych
60
+ requirement: &70198460351340 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ type: :runtime
67
+ prerelease: false
68
+ version_requirements: *70198460351340
69
+ - !ruby/object:Gem::Dependency
70
+ name: rchardet19
71
+ requirement: &70198460364300 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: *70198460364300
80
+ - !ruby/object:Gem::Dependency
81
+ name: whatlanguage
82
+ requirement: &70198460360840 !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ type: :runtime
89
+ prerelease: false
90
+ version_requirements: *70198460360840
91
+ - !ruby/object:Gem::Dependency
92
+ name: wordnet
93
+ requirement: &70198460374520 !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ! '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ type: :runtime
100
+ prerelease: false
101
+ version_requirements: *70198460374520
102
+ - !ruby/object:Gem::Dependency
103
+ name: rbtagger
104
+ requirement: &70198460372420 !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ type: :runtime
111
+ prerelease: false
112
+ version_requirements: *70198460372420
113
+ - !ruby/object:Gem::Dependency
114
+ name: engtagger
115
+ requirement: &70198460370340 !ruby/object:Gem::Requirement
116
+ none: false
117
+ requirements:
118
+ - - ! '>='
119
+ - !ruby/object:Gem::Version
120
+ version: '0'
121
+ type: :runtime
122
+ prerelease: false
123
+ version_requirements: *70198460370340
124
+ - !ruby/object:Gem::Dependency
125
+ name: punkt-segmenter
126
+ requirement: &70198460383060 !ruby/object:Gem::Requirement
127
+ none: false
128
+ requirements:
129
+ - - ! '>='
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: *70198460383060
135
+ - !ruby/object:Gem::Dependency
136
+ name: tokenizer
137
+ requirement: &70198460380700 !ruby/object:Gem::Requirement
138
+ none: false
139
+ requirements:
140
+ - - ! '>='
141
+ - !ruby/object:Gem::Version
142
+ version: '0'
143
+ type: :runtime
144
+ prerelease: false
145
+ version_requirements: *70198460380700
146
+ - !ruby/object:Gem::Dependency
147
+ name: tactful_tokenizer
148
+ requirement: &70198460378320 !ruby/object:Gem::Requirement
149
+ none: false
150
+ requirements:
151
+ - - ! '>='
152
+ - !ruby/object:Gem::Version
153
+ version: '0'
154
+ type: :runtime
155
+ prerelease: false
156
+ version_requirements: *70198460378320
157
+ - !ruby/object:Gem::Dependency
158
+ name: english
159
+ requirement: &70198460375900 !ruby/object:Gem::Requirement
160
+ none: false
161
+ requirements:
162
+ - - ! '>='
163
+ - !ruby/object:Gem::Version
164
+ version: '0'
165
+ type: :runtime
166
+ prerelease: false
167
+ version_requirements: *70198460375900
168
+ - !ruby/object:Gem::Dependency
169
+ name: linguistics
170
+ requirement: &70198460391000 !ruby/object:Gem::Requirement
171
+ none: false
172
+ requirements:
173
+ - - ! '>='
174
+ - !ruby/object:Gem::Version
175
+ version: '0'
176
+ type: :runtime
177
+ prerelease: false
178
+ version_requirements: *70198460391000
179
+ - !ruby/object:Gem::Dependency
180
+ name: ruby-stemmer
181
+ requirement: &70198460389700 !ruby/object:Gem::Requirement
182
+ none: false
183
+ requirements:
184
+ - - ! '>='
185
+ - !ruby/object:Gem::Version
186
+ version: '0'
187
+ type: :runtime
188
+ prerelease: false
189
+ version_requirements: *70198460389700
190
+ - !ruby/object:Gem::Dependency
191
+ name: uea-stemmer
192
+ requirement: &70198460388460 !ruby/object:Gem::Requirement
193
+ none: false
194
+ requirements:
195
+ - - ! '>='
196
+ - !ruby/object:Gem::Version
197
+ version: '0'
198
+ type: :runtime
199
+ prerelease: false
200
+ version_requirements: *70198460388460
201
+ - !ruby/object:Gem::Dependency
202
+ name: lda-ruby
203
+ requirement: &70198460387460 !ruby/object:Gem::Requirement
204
+ none: false
205
+ requirements:
206
+ - - ! '>='
207
+ - !ruby/object:Gem::Version
208
+ version: '0'
209
+ type: :runtime
210
+ prerelease: false
211
+ version_requirements: *70198460387460
212
+ - !ruby/object:Gem::Dependency
213
+ name: nickel
214
+ requirement: &70198460386440 !ruby/object:Gem::Requirement
215
+ none: false
216
+ requirements:
217
+ - - ! '>='
218
+ - !ruby/object:Gem::Version
219
+ version: '0'
220
+ type: :runtime
221
+ prerelease: false
222
+ version_requirements: *70198460386440
223
+ - !ruby/object:Gem::Dependency
224
+ name: unprof
225
+ requirement: &70198460385440 !ruby/object:Gem::Requirement
226
+ none: false
227
+ requirements:
228
+ - - ! '>='
229
+ - !ruby/object:Gem::Version
230
+ version: '0'
231
+ type: :development
232
+ prerelease: false
233
+ version_requirements: *70198460385440
234
+ description: ! ' Treat is a toolkit for text retrieval, information extraction and
235
+ natural language processing. '
236
+ email:
237
+ - louis.mullie@gmail.com
238
+ executables: []
239
+ extensions: []
240
+ extra_rdoc_files: []
241
+ files:
242
+ - lib/treat/buildable.rb
243
+ - lib/treat/categories.rb
244
+ - lib/treat/category.rb
245
+ - lib/treat/delegatable.rb
246
+ - lib/treat/detectors/encoding/native.rb
247
+ - lib/treat/detectors/encoding/r_chardet19.rb
248
+ - lib/treat/detectors/format/file.rb
249
+ - lib/treat/detectors/language/language_detector.rb
250
+ - lib/treat/detectors/language/what_language.rb
251
+ - lib/treat/detectors.rb
252
+ - lib/treat/entities/collection.rb
253
+ - lib/treat/entities/constituents.rb
254
+ - lib/treat/entities/document.rb
255
+ - lib/treat/entities/entity.rb
256
+ - lib/treat/entities/sentence.rb
257
+ - lib/treat/entities/text.rb
258
+ - lib/treat/entities/tokens.rb
259
+ - lib/treat/entities/zones.rb
260
+ - lib/treat/entities.rb
261
+ - lib/treat/exception.rb
262
+ - lib/treat/extractors/key_sentences/topics_frequency.rb
263
+ - lib/treat/extractors/named_entity/abner.rb
264
+ - lib/treat/extractors/named_entity/stanford.rb
265
+ - lib/treat/extractors/statistics/frequency.rb
266
+ - lib/treat/extractors/statistics/frequency_of.rb
267
+ - lib/treat/extractors/statistics/position_in.rb
268
+ - lib/treat/extractors/statistics/transition_matrix.rb
269
+ - lib/treat/extractors/statistics/transition_probability.rb
270
+ - lib/treat/extractors/time/chronic.rb
271
+ - lib/treat/extractors/time/native.rb
272
+ - lib/treat/extractors/time/nickel.rb
273
+ - lib/treat/extractors/topic_words/lda/data.dat
274
+ - lib/treat/extractors/topic_words/lda/wiki.yml
275
+ - lib/treat/extractors/topic_words/lda.rb
276
+ - lib/treat/extractors/topics/reuters/industry.xml
277
+ - lib/treat/extractors/topics/reuters/region.xml
278
+ - lib/treat/extractors/topics/reuters/topics.xml
279
+ - lib/treat/extractors/topics/reuters.rb
280
+ - lib/treat/extractors.rb
281
+ - lib/treat/feature.rb
282
+ - lib/treat/formatters/cleaners/html.rb
283
+ - lib/treat/formatters/readers/autoselect.rb
284
+ - lib/treat/formatters/readers/gocr.rb
285
+ - lib/treat/formatters/readers/html.rb
286
+ - lib/treat/formatters/readers/ocropus.rb
287
+ - lib/treat/formatters/readers/pdf.rb
288
+ - lib/treat/formatters/readers/txt.rb
289
+ - lib/treat/formatters/serializers/xml.rb
290
+ - lib/treat/formatters/serializers/yaml/helper.rb
291
+ - lib/treat/formatters/serializers/yaml.rb
292
+ - lib/treat/formatters/unserializers/autoselect.rb
293
+ - lib/treat/formatters/unserializers/xml.rb
294
+ - lib/treat/formatters/unserializers/yaml.rb
295
+ - lib/treat/formatters/visualizers/dot.rb
296
+ - lib/treat/formatters/visualizers/html.rb
297
+ - lib/treat/formatters/visualizers/inspect.rb
298
+ - lib/treat/formatters/visualizers/short_value.rb
299
+ - lib/treat/formatters/visualizers/standoff.rb
300
+ - lib/treat/formatters/visualizers/tree.rb
301
+ - lib/treat/formatters/visualizers/txt.rb
302
+ - lib/treat/formatters.rb
303
+ - lib/treat/group.rb
304
+ - lib/treat/inflectors/cardinal_words/linguistics.rb
305
+ - lib/treat/inflectors/conjugators/linguistics.rb
306
+ - lib/treat/inflectors/declensors/en.rb
307
+ - lib/treat/inflectors/declensors/linguistics.rb
308
+ - lib/treat/inflectors/lemmatizers/e_lemma/elemma.c
309
+ - lib/treat/inflectors/lemmatizers/e_lemma/extconf.rb
310
+ - lib/treat/inflectors/lemmatizers/e_lemma/Makefile
311
+ - lib/treat/inflectors/lemmatizers/e_lemma.rb
312
+ - lib/treat/inflectors/ordinal_words/linguistics.rb
313
+ - lib/treat/inflectors/stemmers/porter.rb
314
+ - lib/treat/inflectors/stemmers/porter_c.rb
315
+ - lib/treat/inflectors/stemmers/uea.rb
316
+ - lib/treat/inflectors.rb
317
+ - lib/treat/lexicalizers/category/from_tag.rb
318
+ - lib/treat/lexicalizers/linkages/naive.rb
319
+ - lib/treat/lexicalizers/synsets/rita_wn.rb
320
+ - lib/treat/lexicalizers/synsets/wordnet.rb
321
+ - lib/treat/lexicalizers/tag/brill.rb
322
+ - lib/treat/lexicalizers/tag/lingua.rb
323
+ - lib/treat/lexicalizers/tag/stanford.rb
324
+ - lib/treat/lexicalizers.rb
325
+ - lib/treat/processors/chunkers/txt.rb
326
+ - lib/treat/processors/parsers/enju.rb
327
+ - lib/treat/processors/parsers/stanford.rb
328
+ - lib/treat/processors/segmenters/punkt.rb
329
+ - lib/treat/processors/segmenters/stanford.rb
330
+ - lib/treat/processors/segmenters/tactful.rb
331
+ - lib/treat/processors/tokenizers/macintyre.rb
332
+ - lib/treat/processors/tokenizers/multilingual.rb
333
+ - lib/treat/processors/tokenizers/perl.rb
334
+ - lib/treat/processors/tokenizers/punkt.rb
335
+ - lib/treat/processors/tokenizers/stanford.rb
336
+ - lib/treat/processors/tokenizers/tactful.rb
337
+ - lib/treat/processors.rb
338
+ - lib/treat/proxies.rb
339
+ - lib/treat/registrable.rb
340
+ - lib/treat/resources/categories.rb
341
+ - lib/treat/resources/delegates.rb
342
+ - lib/treat/resources/dependencies.rb
343
+ - lib/treat/resources/edges.rb
344
+ - lib/treat/resources/formats.rb
345
+ - lib/treat/resources/languages.rb
346
+ - lib/treat/resources/languages.txt
347
+ - lib/treat/resources/tags.rb
348
+ - lib/treat/resources.rb
349
+ - lib/treat/sugar.rb
350
+ - lib/treat/tree.rb
351
+ - lib/treat/utilities.rb
352
+ - lib/treat/visitable.rb
353
+ - lib/treat.rb
354
+ - test/profile.rb
355
+ - test/tc_detectors.rb
356
+ - test/tc_entity.rb
357
+ - test/tc_extractors.rb
358
+ - test/tc_formatters.rb
359
+ - test/tc_inflectors.rb
360
+ - test/tc_lexicalizers.rb
361
+ - test/tc_processors.rb
362
+ - test/tc_resources.rb
363
+ - test/tc_treat.rb
364
+ - test/tc_tree.rb
365
+ - test/tests.rb
366
+ - test/texts/english/long.html
367
+ - test/texts/english/long.txt
368
+ - test/texts/english/medium.txt
369
+ - test/texts/english/short.txt
370
+ - test/texts.rb
371
+ - examples/benchmark.rb
372
+ - examples/keywords.rb
373
+ - examples/texts/bugged_out.txt
374
+ - examples/texts/half_cocked_basel.txt
375
+ - examples/texts/hedge_funds.txt
376
+ - examples/texts/hose_and_dry.txt
377
+ - examples/texts/hungarys_troubles.txt
378
+ - examples/texts/indias_slowdown.txt
379
+ - examples/texts/merkozy_rides_again.txt
380
+ - examples/texts/prada_is_not_walmart.txt
381
+ - examples/texts/republican_nomination.txt
382
+ - examples/texts/to_infinity_and_beyond.txt
383
+ - bin/INFO
384
+ - README
385
+ - TODO
386
+ - LICENSE
387
+ - INSTALL
388
+ homepage: http://www.github.com/louismullie/nat
389
+ licenses: []
390
+ post_install_message:
391
+ rdoc_options: []
392
+ require_paths:
393
+ - lib
394
+ required_ruby_version: !ruby/object:Gem::Requirement
395
+ none: false
396
+ requirements:
397
+ - - ! '>='
398
+ - !ruby/object:Gem::Version
399
+ version: '0'
400
+ required_rubygems_version: !ruby/object:Gem::Requirement
401
+ none: false
402
+ requirements:
403
+ - - ! '>='
404
+ - !ruby/object:Gem::Version
405
+ version: '0'
406
+ requirements: []
407
+ rubyforge_project:
408
+ rubygems_version: 1.8.15
409
+ signing_key:
410
+ specification_version: 3
411
+ summary: Text retrieval, extraction and annotation toolkit
412
+ test_files: []