treat 0.2.5 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (242) hide show
  1. data/LICENSE +3 -3
  2. data/README.md +33 -0
  3. data/files/INFO +1 -0
  4. data/lib/treat.rb +40 -105
  5. data/lib/treat/ai.rb +12 -0
  6. data/lib/treat/ai/classifiers/id3.rb +27 -0
  7. data/lib/treat/categories.rb +82 -35
  8. data/lib/treat/categorizable.rb +44 -0
  9. data/lib/treat/classification.rb +61 -0
  10. data/lib/treat/configurable.rb +115 -0
  11. data/lib/treat/data_set.rb +42 -0
  12. data/lib/treat/dependencies.rb +24 -0
  13. data/lib/treat/downloader.rb +87 -0
  14. data/lib/treat/entities.rb +68 -66
  15. data/lib/treat/entities/abilities.rb +10 -0
  16. data/lib/treat/entities/abilities/buildable.rb +327 -0
  17. data/lib/treat/entities/abilities/checkable.rb +31 -0
  18. data/lib/treat/entities/abilities/copyable.rb +45 -0
  19. data/lib/treat/entities/abilities/countable.rb +51 -0
  20. data/lib/treat/entities/abilities/debuggable.rb +83 -0
  21. data/lib/treat/entities/abilities/delegatable.rb +123 -0
  22. data/lib/treat/entities/abilities/doable.rb +62 -0
  23. data/lib/treat/entities/abilities/exportable.rb +11 -0
  24. data/lib/treat/entities/abilities/iterable.rb +115 -0
  25. data/lib/treat/entities/abilities/magical.rb +83 -0
  26. data/lib/treat/entities/abilities/registrable.rb +74 -0
  27. data/lib/treat/entities/abilities/stringable.rb +91 -0
  28. data/lib/treat/entities/entities.rb +104 -0
  29. data/lib/treat/entities/entity.rb +122 -245
  30. data/lib/treat/exception.rb +4 -4
  31. data/lib/treat/extractors.rb +77 -80
  32. data/lib/treat/extractors/keywords/tf_idf.rb +56 -22
  33. data/lib/treat/extractors/language/what_language.rb +50 -45
  34. data/lib/treat/extractors/name_tag/stanford.rb +55 -0
  35. data/lib/treat/extractors/tf_idf/native.rb +87 -0
  36. data/lib/treat/extractors/time/chronic.rb +55 -0
  37. data/lib/treat/extractors/time/nickel.rb +86 -62
  38. data/lib/treat/extractors/time/ruby.rb +53 -0
  39. data/lib/treat/extractors/topic_words/lda.rb +67 -58
  40. data/lib/treat/extractors/topics/reuters.rb +100 -87
  41. data/lib/treat/formatters.rb +39 -35
  42. data/lib/treat/formatters/readers/abw.rb +49 -29
  43. data/lib/treat/formatters/readers/autoselect.rb +37 -33
  44. data/lib/treat/formatters/readers/doc.rb +19 -13
  45. data/lib/treat/formatters/readers/html.rb +52 -30
  46. data/lib/treat/formatters/readers/image.rb +41 -40
  47. data/lib/treat/formatters/readers/odt.rb +59 -45
  48. data/lib/treat/formatters/readers/pdf.rb +28 -25
  49. data/lib/treat/formatters/readers/txt.rb +12 -15
  50. data/lib/treat/formatters/readers/xml.rb +73 -36
  51. data/lib/treat/formatters/serializers/xml.rb +80 -79
  52. data/lib/treat/formatters/serializers/yaml.rb +19 -18
  53. data/lib/treat/formatters/unserializers/autoselect.rb +12 -22
  54. data/lib/treat/formatters/unserializers/xml.rb +94 -99
  55. data/lib/treat/formatters/unserializers/yaml.rb +20 -19
  56. data/lib/treat/formatters/visualizers/dot.rb +132 -132
  57. data/lib/treat/formatters/visualizers/standoff.rb +52 -44
  58. data/lib/treat/formatters/visualizers/tree.rb +26 -29
  59. data/lib/treat/groupable.rb +153 -0
  60. data/lib/treat/helpers/decimal_point_escaper.rb +22 -0
  61. data/lib/treat/inflectors.rb +50 -45
  62. data/lib/treat/inflectors/cardinalizers/linguistics.rb +40 -0
  63. data/lib/treat/inflectors/conjugators/linguistics.rb +55 -0
  64. data/lib/treat/inflectors/declensors/active_support.rb +31 -0
  65. data/lib/treat/inflectors/declensors/english.rb +38 -0
  66. data/lib/treat/inflectors/declensors/english/inflect.rb +288 -0
  67. data/lib/treat/inflectors/declensors/linguistics.rb +49 -0
  68. data/lib/treat/inflectors/ordinalizers/linguistics.rb +17 -0
  69. data/lib/treat/inflectors/stemmers/porter.rb +160 -0
  70. data/lib/treat/inflectors/stemmers/porter_c.rb +24 -0
  71. data/lib/treat/inflectors/stemmers/uea.rb +28 -0
  72. data/lib/treat/installer.rb +308 -0
  73. data/lib/treat/kernel.rb +105 -27
  74. data/lib/treat/languages.rb +122 -88
  75. data/lib/treat/languages/arabic.rb +15 -15
  76. data/lib/treat/languages/chinese.rb +15 -15
  77. data/lib/treat/languages/dutch.rb +15 -15
  78. data/lib/treat/languages/english.rb +61 -62
  79. data/lib/treat/languages/french.rb +19 -19
  80. data/lib/treat/languages/german.rb +20 -20
  81. data/lib/treat/languages/greek.rb +15 -15
  82. data/lib/treat/languages/italian.rb +16 -16
  83. data/lib/treat/languages/polish.rb +15 -15
  84. data/lib/treat/languages/portuguese.rb +15 -15
  85. data/lib/treat/languages/russian.rb +15 -15
  86. data/lib/treat/languages/spanish.rb +16 -16
  87. data/lib/treat/languages/swedish.rb +16 -16
  88. data/lib/treat/lexicalizers.rb +34 -55
  89. data/lib/treat/lexicalizers/categorizers/from_tag.rb +54 -0
  90. data/lib/treat/lexicalizers/sensers/wordnet.rb +57 -0
  91. data/lib/treat/lexicalizers/sensers/wordnet/synset.rb +71 -0
  92. data/lib/treat/lexicalizers/taggers/brill.rb +70 -0
  93. data/lib/treat/lexicalizers/taggers/brill/patch.rb +61 -0
  94. data/lib/treat/lexicalizers/taggers/lingua.rb +90 -0
  95. data/lib/treat/lexicalizers/taggers/stanford.rb +97 -0
  96. data/lib/treat/linguistics.rb +9 -0
  97. data/lib/treat/linguistics/categories.rb +11 -0
  98. data/lib/treat/linguistics/tags.rb +422 -0
  99. data/lib/treat/loaders/linguistics.rb +30 -0
  100. data/lib/treat/loaders/stanford.rb +27 -0
  101. data/lib/treat/object.rb +1 -0
  102. data/lib/treat/processors.rb +37 -44
  103. data/lib/treat/processors/chunkers/autoselect.rb +16 -0
  104. data/lib/treat/processors/chunkers/html.rb +71 -0
  105. data/lib/treat/processors/chunkers/txt.rb +18 -24
  106. data/lib/treat/processors/parsers/enju.rb +253 -208
  107. data/lib/treat/processors/parsers/stanford.rb +130 -131
  108. data/lib/treat/processors/segmenters/punkt.rb +79 -45
  109. data/lib/treat/processors/segmenters/stanford.rb +46 -48
  110. data/lib/treat/processors/segmenters/tactful.rb +43 -36
  111. data/lib/treat/processors/tokenizers/perl.rb +124 -92
  112. data/lib/treat/processors/tokenizers/ptb.rb +81 -0
  113. data/lib/treat/processors/tokenizers/punkt.rb +48 -42
  114. data/lib/treat/processors/tokenizers/stanford.rb +39 -38
  115. data/lib/treat/processors/tokenizers/tactful.rb +64 -55
  116. data/lib/treat/proxies.rb +52 -35
  117. data/lib/treat/retrievers.rb +26 -16
  118. data/lib/treat/retrievers/indexers/ferret.rb +47 -26
  119. data/lib/treat/retrievers/searchers/ferret.rb +69 -50
  120. data/lib/treat/tree.rb +241 -183
  121. data/spec/collection.rb +123 -0
  122. data/spec/document.rb +93 -0
  123. data/spec/entity.rb +408 -0
  124. data/spec/languages.rb +25 -0
  125. data/spec/phrase.rb +146 -0
  126. data/spec/samples/mathematicians/archimedes.abw +34 -0
  127. data/spec/samples/mathematicians/euler.html +21 -0
  128. data/spec/samples/mathematicians/gauss.pdf +0 -0
  129. data/spec/samples/mathematicians/leibniz.txt +13 -0
  130. data/spec/samples/mathematicians/newton.doc +0 -0
  131. data/spec/sandbox.rb +5 -0
  132. data/spec/token.rb +109 -0
  133. data/spec/treat.rb +52 -0
  134. data/spec/tree.rb +117 -0
  135. data/spec/word.rb +110 -0
  136. data/spec/zone.rb +66 -0
  137. data/tmp/INFO +1 -1
  138. metadata +100 -201
  139. data/INSTALL +0 -1
  140. data/README +0 -3
  141. data/TODO +0 -28
  142. data/lib/economist/half_cocked_basel.txt +0 -16
  143. data/lib/economist/hungarys_troubles.txt +0 -46
  144. data/lib/economist/indias_slowdown.txt +0 -15
  145. data/lib/economist/merkozy_rides_again.txt +0 -24
  146. data/lib/economist/prada_is_not_walmart.txt +0 -9
  147. data/lib/economist/to_infinity_and_beyond.txt +0 -15
  148. data/lib/ferret/_11.cfs +0 -0
  149. data/lib/ferret/_14.cfs +0 -0
  150. data/lib/ferret/_p.cfs +0 -0
  151. data/lib/ferret/_s.cfs +0 -0
  152. data/lib/ferret/_v.cfs +0 -0
  153. data/lib/ferret/_y.cfs +0 -0
  154. data/lib/ferret/segments +0 -0
  155. data/lib/ferret/segments_15 +0 -0
  156. data/lib/treat/buildable.rb +0 -157
  157. data/lib/treat/category.rb +0 -33
  158. data/lib/treat/delegatable.rb +0 -116
  159. data/lib/treat/doable.rb +0 -45
  160. data/lib/treat/entities/collection.rb +0 -14
  161. data/lib/treat/entities/document.rb +0 -12
  162. data/lib/treat/entities/phrases.rb +0 -17
  163. data/lib/treat/entities/tokens.rb +0 -61
  164. data/lib/treat/entities/zones.rb +0 -41
  165. data/lib/treat/extractors/coreferences/stanford.rb +0 -69
  166. data/lib/treat/extractors/date/chronic.rb +0 -32
  167. data/lib/treat/extractors/date/ruby.rb +0 -25
  168. data/lib/treat/extractors/keywords/topics_tf_idf.rb +0 -48
  169. data/lib/treat/extractors/language/language_extractor.rb +0 -27
  170. data/lib/treat/extractors/named_entity_tag/stanford.rb +0 -53
  171. data/lib/treat/extractors/roles/naive.rb +0 -73
  172. data/lib/treat/extractors/statistics/frequency_in.rb +0 -16
  173. data/lib/treat/extractors/statistics/position_in.rb +0 -14
  174. data/lib/treat/extractors/statistics/tf_idf.rb +0 -104
  175. data/lib/treat/extractors/statistics/transition_matrix.rb +0 -105
  176. data/lib/treat/extractors/statistics/transition_probability.rb +0 -57
  177. data/lib/treat/extractors/topic_words/lda/data.dat +0 -46
  178. data/lib/treat/extractors/topic_words/lda/wiki.yml +0 -121
  179. data/lib/treat/extractors/topics/reuters/industry.xml +0 -2717
  180. data/lib/treat/extractors/topics/reuters/region.xml +0 -13586
  181. data/lib/treat/extractors/topics/reuters/topics.xml +0 -17977
  182. data/lib/treat/feature.rb +0 -58
  183. data/lib/treat/features.rb +0 -7
  184. data/lib/treat/formatters/visualizers/short_value.rb +0 -29
  185. data/lib/treat/formatters/visualizers/txt.rb +0 -45
  186. data/lib/treat/group.rb +0 -106
  187. data/lib/treat/helpers/linguistics_loader.rb +0 -18
  188. data/lib/treat/inflectors/cardinal_words/linguistics.rb +0 -42
  189. data/lib/treat/inflectors/conjugations/linguistics.rb +0 -36
  190. data/lib/treat/inflectors/declensions/english.rb +0 -319
  191. data/lib/treat/inflectors/declensions/linguistics.rb +0 -42
  192. data/lib/treat/inflectors/ordinal_words/linguistics.rb +0 -20
  193. data/lib/treat/inflectors/stem/porter.rb +0 -162
  194. data/lib/treat/inflectors/stem/porter_c.rb +0 -26
  195. data/lib/treat/inflectors/stem/uea.rb +0 -30
  196. data/lib/treat/install.rb +0 -59
  197. data/lib/treat/languages/tags.rb +0 -377
  198. data/lib/treat/lexicalizers/category/from_tag.rb +0 -49
  199. data/lib/treat/lexicalizers/linkages/naive.rb +0 -63
  200. data/lib/treat/lexicalizers/synsets/wordnet.rb +0 -76
  201. data/lib/treat/lexicalizers/tag/brill.rb +0 -91
  202. data/lib/treat/lexicalizers/tag/lingua.rb +0 -123
  203. data/lib/treat/lexicalizers/tag/stanford.rb +0 -70
  204. data/lib/treat/processors/segmenters/punkt/dutch.yaml +0 -9716
  205. data/lib/treat/processors/segmenters/punkt/english.yaml +0 -10340
  206. data/lib/treat/processors/segmenters/punkt/french.yaml +0 -43159
  207. data/lib/treat/processors/segmenters/punkt/german.yaml +0 -9572
  208. data/lib/treat/processors/segmenters/punkt/greek.yaml +0 -6050
  209. data/lib/treat/processors/segmenters/punkt/italian.yaml +0 -14748
  210. data/lib/treat/processors/segmenters/punkt/polish.yaml +0 -9751
  211. data/lib/treat/processors/segmenters/punkt/portuguese.yaml +0 -13662
  212. data/lib/treat/processors/segmenters/punkt/russian.yaml +0 -4237
  213. data/lib/treat/processors/segmenters/punkt/spanish.yaml +0 -24034
  214. data/lib/treat/processors/segmenters/punkt/swedish.yaml +0 -10001
  215. data/lib/treat/processors/tokenizers/macintyre.rb +0 -77
  216. data/lib/treat/processors/tokenizers/multilingual.rb +0 -30
  217. data/lib/treat/registrable.rb +0 -28
  218. data/lib/treat/sugar.rb +0 -50
  219. data/lib/treat/viewable.rb +0 -29
  220. data/lib/treat/visitable.rb +0 -28
  221. data/test/profile.rb +0 -2
  222. data/test/tc_entity.rb +0 -117
  223. data/test/tc_extractors.rb +0 -73
  224. data/test/tc_formatters.rb +0 -41
  225. data/test/tc_inflectors.rb +0 -34
  226. data/test/tc_lexicalizers.rb +0 -32
  227. data/test/tc_processors.rb +0 -50
  228. data/test/tc_resources.rb +0 -22
  229. data/test/tc_treat.rb +0 -60
  230. data/test/tc_tree.rb +0 -60
  231. data/test/tests.rb +0 -20
  232. data/test/texts.rb +0 -19
  233. data/test/texts/english/half_cocked_basel.txt +0 -16
  234. data/test/texts/english/hose_and_dry.doc +0 -0
  235. data/test/texts/english/hungarys_troubles.abw +0 -70
  236. data/test/texts/english/long.html +0 -24
  237. data/test/texts/english/long.txt +0 -22
  238. data/test/texts/english/medium.txt +0 -5
  239. data/test/texts/english/republican_nomination.pdf +0 -0
  240. data/test/texts/english/saving_the_euro.odt +0 -0
  241. data/test/texts/english/short.txt +0 -3
  242. data/test/texts/english/zero_sum.html +0 -111
data/INSTALL DELETED
@@ -1 +0,0 @@
1
- See the Wiki: https://github.com/louismullie/treat/wiki/Installing-Treat
data/README DELETED
@@ -1,3 +0,0 @@
1
- Treat - Text Retrieval and Annotation Toolkit
2
-
3
- See the wiki for more information at https://github.com/louismullie/treat/wiki/.
data/TODO DELETED
@@ -1,28 +0,0 @@
1
- ## Todo
2
-
3
- - Chronic, ruby date
4
- - YAML in folder
5
- - Tag remove sentence
6
- - Stanford/Enju phrase in phrase
7
- - Date, time, roles structs
8
- - test wiki
9
- - Fix tag set for Stanford Parser.
10
- - Make sure that Core NLP reloads appropriately.
11
- - XML unserializer
12
- - Tag opt
13
- - Stanford dependencies parse
14
- - Annotator dependencies
15
- - Meronyms
16
- - Add XML stripper from the Stanford tools
17
- - Try new encodings
18
- - Add Stanford and Wordnet lemmatizers
19
- - Improve linkers
20
- - Sentiment analysis
21
- - Enju as a server
22
- - More information with categories
23
- - Gist languages convert
24
- -
25
-
26
- ## Personal
27
-
28
- - Check treat.rb, tests.rb, kernel.rb.
@@ -1,16 +0,0 @@
1
- Half-cocked Basel
2
- Stop-gap rules on banks’ trading books may add perilous complexity
3
-
4
- THE NEW-YEAR hangover throbbed agonisingly for investment bankers this year. Blame Basel 2.5, a new set of international rules which charges banks higher capital for the risks they run in their trading books (as opposed to their banking books, where they keep assets that they intend to hold to maturity). Those charges were too low before. And heaping higher costs on banks should please politicians and Joe Public. But they add another layer of complexity to banks’ risk management.
5
-
6
- Basel 2.5 came into force on December 31st in most European and major world financial jurisdictions. Switzerland applied the rules a year early, and the costs are substantial. Third-quarter figures for Credit Suisse show a 28% increase in risk-weighted assets, and hence capital charges, for its investment-banking activities purely because of Basel 2.5.
7
-
8
- The most notable laggard is America. US financial regulators do not oppose Basel 2.5, but it clashes with the Dodd-Frank act, America’s big wet blanket of a financial reform. Basel 2.5 uses credit ratings from recognised agencies such as Moody’s and Standard & Poor’s to calibrate capital charges. Dodd-Frank expressly forbids the use of such ratings agencies, whose poor judgments are held partly responsible for the crisis. Instead American regulators are working on their own cocktail of credit-risk calibrations for Basel 2.5, using market data and country-risk ratings from the OECD. Their solution is still months away from application (though not as distant as implementation by the Russians or Argentines).
9
-
10
- Basel 2.5 for the first time charges banks extra capital for the credit risk of what they hold in their trading portfolio (because the crisis showed that markets are not always liquid enough to be able to offload assets). That includes a charge for the risk that a counterparty goes bust. It also imposes heavy charges on securitised bundles of assets unless the credit risk of each piece of the bundle has an identifiable market price. Banks that have portfolios of trading positions which they reckon offset each other have to convince regulators that their risk models work or face being charged at a cruder, standardised rate.
11
-
12
- The problem with Basel 2.5, recognised by regulators and bankers alike, is its complexity. The risk of a trading portfolio must now be broken down into five “buckets”—value at risk (VaR), a measure of how much could be lost in an average trading day; stressed VaR (how much could be lost in extreme conditions); plus three types of credit risk ranging from the risk of single credits to those of securitised loans. Traders are understandably confused. For some banks, developing risk models and getting them approved is just too expensive: more complex businesses will be shut down. That will please those who want banks to be more boring.
13
-
14
- But unintended consequences will doubtless follow. Useful products may become less tradable. Trading of riskier products could migrate to unregulated entities. Banks may be tempted into new forms of regulatory arbitrage, by juggling assets between their trading book and their banking book. Worst of all, perhaps, is the increased risk of back-office bungling because of the extra complexity.
15
-
16
- Regulators recognise this risk. The Basel Committee on Banking Supervision, which drew up the rules and is also responsible for the full Basel 3 regime that will come into force in 2019, is still conducting what it calls a “fundamental review” of capital rules for banks’ trading books. Publication is not expected before March. Those sore heads will not soon clear.
@@ -1,46 +0,0 @@
1
- Hungary's troubles
2
- Not just a rap on the knuckles
3
-
4
- THE pressure is piling up on the beleaguered Hungarian government. Today the European Commission threatened it with legal action over several new "cardinal" laws that would require a two-thirds majority in parliament to overturn.
5
-
6
- The commission is still considering the laws, but today it highlighted concerns over three issues:
7
-
8
- - The independence of the central bank. Late last year the Hungarian parliament passed a law which expands the monetary council and takes the power to nominate deputies away from the governor and hands it to the prime minister. A separate law opens the door to a merger between the bank and the financial regulator.
9
-
10
- - The judiciary. More than 200 judges over the age of 62 have been forced into retirement and hundreds more face the sack. The new National Judicial Authority is headed by Tünde Handó, a friend of the family of Viktor Orban, the prime minister.
11
-
12
- - The independence of the national data authority.
13
-
14
- That wasn't all the commission had to say today. Hungary also received a ticking-off from Olli Rehn (pictured), the economic-affairs commissioner, for not doing enough to tackle its budget deficit. It may now lose access to EU funds.
15
-
16
- Slammed in Brussels, the Hungarian government is also under pressure at home. Earlier this week Gordon Bajnai, who served as Socialist prime minister from 2009-10, fired off a broadside that sent shockwaves through the political and media establishments.
17
-
18
- After a year and a half of government by the right-wing Fidesz party, wrote Mr Bajnai in a lengthy article on the website of the Patriotism and Progress Public Policy Foundation, democracy has been destroyed in Hungary. The country, he warned, is scarred by division and is drifting towards bankruptcy and away from Europe.
19
-
20
- Mr Bajnai called for a radical change of government and a complete political re-orientation. “A new government must have a programme readily at hand that can be applied without delay: a programme that promotes the republic, reconciliation, and recovery.”
21
-
22
- Fidesz is rattled by Mr Bajnai, who since leaving office has been teaching at Columbia University in New York. Understandably so. He headed a technocratic administration which stabilised the economy. Unlike his Socialist predecessor, Ferenc Gyurcsany, he was neither part of the old Communist elite nor connected to it by marriage, and so cannot be smeared as a "Komcsi". He is modern in outlook and well regarded internationally.
23
-
24
- Moreover, say those how know him, Mr Bajnai has little patience for the narcissistic exceptionalism that shapes Fidesz’s worldview. Exhibit A: the plaintive cry of Janos Martonyi, the foreign minister, who lamented recently: “The world will never understand our pains and spiritual wounds.” Such self-pity is unlikely to endear the Hungarian government to Brussels or Washington DC (to where it has sent an envoy this week to negotiate with the IMF).
25
-
26
- Fidesz won a two-thirds majority in 2010. But its support is evaporating, and analysts say there is a gap in the political market for a centrist pro-business party committed to democratic norms. Mr Bajnai, who has not ruled out a return to politics, would be an obvious candidate to lead it.
27
-
28
- Meanwhile, as Hungarians watch the value of their assets vaporise, in large part thanks to the government’s increasingly erratic policies, Mr Orban smirks his way through press conferences. Here he is dodging questions from a reporter from HVG, an economics weekly, about his responsibility for the crisis and trying to shift the blame to his old enemy Andras Simor, president of the central bank. The interview ran as follows:
29
-
30
- hvg.hu: Do you feel responsible for the falling/weakening forint?
31
-
32
- Mr Orban: You mean the president of the central bank? He did not comment on it.
33
-
34
- hvg.hu: No, you, Mr prime minister!
35
-
36
- Mr Orban: The personal responsibility of the president of the central bank was not discussed over the meeting.
37
-
38
- hvg.hu: You, your personal…!
39
-
40
- Mr Orban: That neither.
41
-
42
- Surrounded by yes-men and grinning flunkies, Mr Orban seems increasingly out of touch. His future will likely be decided not in the gilded corridors of the Hungarian parliament, but in Brussels and Washington DC.
43
-
44
- What happens next? If his hand is forced Mr Orban can probably endure policy reversals on the independence of the central bank and the data ombudsman. Sorry, he would say to his loyal followers: national crisis, what can you do.
45
-
46
- The dismantling of the judiciary would be another matter. If outsiders keep up the pressure and the judicial changes are judged to be in breach of the EU treaty, Mr Orban would be in a tricky spot. It’s hard to see how he could declare the 200-plus judges his government has forced into retirement ready for office after all, and still sit in his own.
@@ -1,15 +0,0 @@
1
- India’s slowdown
2
- The case for the defence
3
- Why officials think investors are too bearish about India’s economy
4
-
5
- THE SMOG is so bad in Delhi right now that it seeps indoors. In one government building the far end of the corridor seems hazy. But the view of the mandarin working there is clear: India’s economic miracle is not over, regardless of the chatter among investors and howls about government paralysis from industrialists. He pokes fun at the latter. A year ago they were swanning around Davos proclaiming India could grow in its sleep, he says. Now, with growth dipping to 6.9% last quarter, from a peak of 10% (see chart), they are pleading for government action.
6
-
7
- Bears in Mumbai, India’s financial capital, worry that GDP growth might slip below 6% as confidence and investment slip. That partly reflects global woes, and partly too the gumming up of the bureaucracy due to a wave of graft allegations. But it is also because no big reforms have taken place for years; and such is the dire state of India’s politics that it is hard to imagine any being imminent. Things reached a nadir at the end of last year when the ruling coalition announced it would allow foreign supermarkets into the country, only to do a U-turn in the face of protests from the opposition and its own coalition partners. Shortly afterwards it failed to carry a key anti-corruption bill through parliament.
8
-
9
- The government is not blind to these concerns—in his new-year address the prime minister, Manmohan Singh, conceded that “it would be wrong to conclude that India is now unshakeably set on a process of rapid growth.” But officials in Delhi are more optimistic than the financial markets, for three reasons. First, they argue that growth is bottoming out. Inflation is showing signs of falling, which should allow the central bank to reverse its long series of interest-rate hikes. The recent drop in the rupee is a healthy adjustment, not cause for panic, they say. Meanwhile the euro zone is vaguely getting its act together and there are hints of a recovery in America. Growth, it is thought, will be about 7% for the fiscal year ending in March, respectable enough, and will pick up from there.
10
-
11
- Second, the long-term drivers of India’s boom are intact. There is “not much reason to change your mind,” says the mandarin. The rise in the savings rate, which allows more investment, will continue, partly thanks to a demographic bulge of people reaching working age. Even if there is a drop in capital expenditure, it should remain above 30% of GDP—a “handsome level”, says another official, that will boost the country’s potential. The government’s 12th five-year plan, which is due out soon, was originally expected to forecast growth of 9% between 2012 and 2017. That might fall to 8.5%, officials say, but no further.
12
-
13
- The final strut of the argument is that the politics are not as bad as they seem. Pessimists, the mandarin says, reckon “we’re just going to fiddle around and miss our opportunities.” But after important elections in February in Uttar Pradesh, the most populous state, the politicians may stop posturing and even co-operate to pass less contentious reforms such as a new national value-added tax. That would cut red tape and the fiscal deficit. And even if parliament stays gridlocked, there are lots of nuts-and-bolts reforms that do not require legislation. The government will try harder to tackle the bottlenecks that choke the power industry, for instance, and the paperwork that is snarling up big projects.
14
-
15
- The nub of the official argument is “calm down—and trust us to do just enough.” The trouble is the government has been saying this for a year, and business folk and investors seem to have lost heart. Firms have cut investment and the stockmarket was one of the world’s worst performers in dollar terms last year. Perhaps they are being too jumpy, but India does not have the luxury of dismissing what firms and investors think. The fiscal deficit, including the states and off-balance-sheet items, is running at 9-10% of GDP for the fourth year in a row. The current-account deficit is drifting towards 4% of GDP, officials admit, well above the country’s traditional comfort zone. India needs to command the confidence of domestic and foreign investors. Unless the reform process starts moving there is a risk that the financing of these deficits will become an acute problem—and that India’s economic miracle recedes further into the Delhi haze.
@@ -1,24 +0,0 @@
1
- Saving the euro, part 473
2
- Merkozy rides again
3
-
4
- ANGELA MERKEL and Nicolas Sarkozy kicked off the 2012 season of the euro soap opera with a summit meeting in Berlin today. Neither said anything startling; certainly nothing that would betoken a swift and happy conclusion to the long-running saga.
5
-
6
- The German chancellor and the French president muted their differences over such issues as how quickly to introduce a tax on financial transactions and what the role of the European Central Bank (ECB) should be in supporting shaky members of the euro zone. “Our analysis is the same,” said Mr Sarkozy at the post-summit press conference.
7
-
8
- This did not calm markets’ nerves. The euro dropped to its lowest level against the dollar since September 2010 ($1.266) before the summit and recovered marginally as the two leaders met. Currency traders’ biggest worry is Greece’s failure to meet its fiscal targets, which means it may not get the fresh money it needs to avoid defaulting on its debt.
9
-
10
- At the opposite end of the confidence spectrum, investors are so eager to finance Germany that they accepted a negative interest rate on an auction of six-month paper, in effect paying Germany’s government for the privilege of lending to it. Germans will see this as vindication of their prudent policies, but it also serves to underline the dangerous economic divergences within the euro zone.
11
-
12
- The main significance of the Merkozy summit is that it seemed to signal a shift in emphasis. True, the austerity agenda—promoted by the Germans and grudgingly accepted by the French—is still there. Indeed, Mr Sarkozy boasted that France’s fiscal deficit was smaller than expected in 2011. Europe is making swift progress towards a “fiscal pact” to limit deficits, proclaimed Mrs Merkel, including German-style “debt brakes”. A new treaty should be signed by March.
13
-
14
- But fiscal self-denial will now be supplemented by what Mrs Merkel called a “second leg”, meaning economic growth and job creation. This is partly meant to help Mr Sarkozy, who faces a tough re-election fight this spring.
15
-
16
- All euro-zone countries, including Germany, are “prepared to do their homework” in this area, the chancellor promised, but it is not clear that much new is on offer. A big German stimulus package to boost growth in neighbouring countries is not in prospect (that would nobble the fiscal leg).
17
-
18
- Mrs Merkel spoke of spreading best practice in labour-market regulation across the euro zone (which is German practice, Mr Sarkozy admits) and spending existing European funds more quickly and effectively. Both ideas make sense; neither will prevent further financial turmoil, or a European recession. In the latest sign of fragility, German industrial production dropped 1% in November.
19
-
20
- The leaders tried to seem anything but complacent. Mr Sarkozy called the situation “very tense” and Mrs Merkel said they had “understood the needs of the hour.” The intention is to keep Greece from dropping out of the euro zone, but whatever happens Greece is an exceptional case, the leaders said (perhaps fearing that a Greek default or even an exit from the euro could not be avoided). As always, the chancellor dampened expectations of a quick “one-dimensional” solution to the crisis. The problem would be solved, she said, “step by step.”
21
-
22
- The next steps involve Italy, an indebted giant that poses a far greater threat to the euro than Greece. Mrs Merkel will meet Italy’s unelected prime minister, Mario Monti, in Berlin on Wednesday; she and Mr Sarkozy will hold a three-way summit with him in Rome on January 20th. European heads of government are to gather, probably on January 30th, to put the finishing touches to the fiscal pact.
23
-
24
- Also on the agenda, no doubt, will be a proposed financial-transactions tax. Britain is threatening a veto; Mr Sarkozy has said France will go it alone at first, if need be. Mrs Merkel wants the tax but her junior coalition partner, the Free Democrats, do not unless the British get on board. As the crisis sharpens, disagreements are likely to re-emerge over the role of the ECB and how to strengthen the euro zone's bail-out funds. The soap opera has a long way to run.
@@ -1,9 +0,0 @@
1
- Prada is not Walmart
2
-
3
- INDIA, if you believe the government, will be a land in which Starbucks and Prada thrive but where foreign firms will be prohibited from selling onions. It does not seem like much of a cause for celebration, but the announcement on January 11th that foreign “single brand” retailers could own 100% of their operations in India was meant to show the reform process was on track. It followed a debacle late last year when the government first announced that not only would single brand retailing be opened up, but foreign supermarkets would be allowed to operate in India too—and then was quickly forced into a U-turn on the latter promise after facing a rebellion within its own ranks and from the coalition parties it relies on in parliament.
4
-
5
- By emphasising that at least the single brand bit of retail reform is still on track, the government hopes to show the world that India is still open for business. But this is a meek change indeed. Single brand retailers, such as fashion chains, were already allowed to own 51% of their operations. And the political stink of last month is likely to scare those who are not already present because swathes of the political class have been shown to be populist and hostile for foreign firms. Individual states may still choose to override the central government’s rules. Lastly, the reform comes with a large catch: 30% of what is sold must be supplied from cottage industries in India. If you are selling a uniform product worldwide—a sofa or handbag made in China—that is a major hassle.
6
-
7
- The hope must be that India is on a journey to the right place, stumbling along the way. Perhaps the supplier rule will eventually be dropped, the argument goes. Maybe reluctant states will learn the error of their ways and open up too, after seeing the success of single brand retailers in other states. And maybe, after seeing an influx of investment from single brand retailers, the political climate will change and it will be easier to pass a reform that lets in supermarkets in too.
8
-
9
- Interviewed in Delhi earlier in January a government mandarin insisted that the supermarket reform was not dead. Yet all of this seems half hearted. India is a hard enough place as it is for foreign firms to make profits. Adding in a fickle polity just makes things worse. And it is a rather sorry day for progress when a rule tweak to allow Starbucks or Prada to own not 51%, but 100%, of their shops is presented as a meaningful economic reform.
@@ -1,15 +0,0 @@
1
- To Infinity and Beyond
2
-
3
- THE annual Consumer Electronics Show (CES) feels right at home amid the blazing neon excess of Las Vegas. For four days each January, thousands of companies gather to showcase their flashiest technologies at America's biggest trade show. This year over 20,000 brand new gadgets competed for attendees' attention. Each has its own power cord or battery, and almost every one is either bigger or faster—and thus more power-hungry—than last year's model.
4
-
5
- The International Energy Agency expects consumer electronics' appetite for electricity to double over the next decade, from 15% to 30% of residential consumption worldwide. Even supposedly energy-saving innovations, such as the organic-LED (OLED) screen in the massive 55-inch television proudly displayed by South Korea's LG, consume oodles of power, just by dint of being so big.
6
-
7
- But this year's show also sees a welcome counter-trend. Several companies launched products labelled as having "infinite power". Such devices are meant to generate at least as much power as they consume. Buy one of these gizmos, the theory goes, and you need never connect it to a wall socket.
8
-
9
- Eton Corporation announced Rukus, a portable boom box that can stream music from Bluetooth devices such as smartphones and tablets. For every hour that the Rukus is in full sunlight, it can play an hour of music, harvesting solar energy from 40 square inches (260 square centimetres) of photovoltaic panels. It has an internal battery which stores sunshine for cloudy picnics and can also be tapped to recharge other mobile gadgets.
10
-
11
- Similarly impressive is a high-tech cover for Amazon's Kindle ebook reader by Solar Focus (see picture). A solar panel on the outer face gives 90 minutes of reading time for every hour of sunlight. Surplus energy is stored in a small lithium-ion battery and allows an LED reading lamp to run for up to 50 hours without drawing on the Kindle's own battery.
12
-
13
- Both the Kindle and the Rukus have frugal monochrome E Ink displays that consume a fraction of the power of the colourful LCD screens found on most gadgets. Even the best solar-panel case for Apple's iPhone, for instance, can do no more than slow the rate at which the smartphone runs down. You might think, then, that something as large as a practical solar-powered electric car would be utterly impossible. Not so, says Ford. At CES, the carmaker showed a domestic solar panel kit it claims will offset all the electricity used over the lifetime of its new Focus Electric plug-in vehicle. The 150 square-foot (14 square-metre) array, to be installed on owners' houses, should feed enough power to the grid as the car draws to recharge its battery. to account for every mile a typical motorist drives. Panels on roofs in rainy Seattle might allow 12,000 miles (19,300km) of driving each year; denizens of sunny Tucson may squeeze out 15,000 miles or more.
14
-
15
- The solar kit will set a Ford owner back around $10,000 (the price would be higher were it not for American federal incentives). It comes with a 25-year guarantee—22 years more than the warranty on the car. This is still some way from the dream of a self-contained solar runabout, but it is a start. The increasingly power-hungry electronics industry might will no doubt need to steer itself in a similar direction.
data/lib/ferret/_11.cfs DELETED
Binary file
data/lib/ferret/_14.cfs DELETED
Binary file
data/lib/ferret/_p.cfs DELETED
Binary file
data/lib/ferret/_s.cfs DELETED
Binary file
data/lib/ferret/_v.cfs DELETED
Binary file
data/lib/ferret/_y.cfs DELETED
Binary file
data/lib/ferret/segments DELETED
Binary file
Binary file
@@ -1,157 +0,0 @@
1
- module Treat
2
- # Represents an object that can be built
3
- # from a folder of files, a specific file,
4
- # a string or a numeric object. This class
5
- # is pretty much self-explanatory.
6
- module Buildable
7
- # Initialize the document with its filename.
8
- # Optionally specify a reader to read the file.
9
- # If +read+ is set to false, the document will
10
- # not be read automatically; in that case, the
11
- # method #read must be called on the document
12
- # object to load it in.
13
- def build(file_or_value = '', id = nil)
14
- from_anything(file_or_value, id)
15
- end
16
- def from_anything(file_or_value, id)
17
- if File.readable?(file_or_value.to_s)
18
- from_file(file_or_value)
19
- elsif file_or_value.is_a?(String)
20
- from_string(file_or_value)
21
- elsif file_or_value.is_a?(Numeric)
22
- from_numeric(file_or_value)
23
- else
24
- raise Treat::Exception,
25
- "Unrecognizable input #{file_or_value}. "+
26
- "Use filename, folder, text or a number."
27
- end
28
- end
29
- def from_string(string, enforce_type = false)
30
- enforce_type = true if caller_method == :build
31
- if self == Treat::Entities::Document ||
32
- self == Treat::Entities::Collection
33
- raise Treat::Exception,
34
- "Cannot create a document or collection from " +
35
- "a string (need a readable file/folder)."
36
- end
37
- unless self == Treat::Entities::Entity
38
- return self.new(string) if enforce_type
39
- end
40
- dot = string.count('.!?')
41
- if self == Treat::Entities::Phrase
42
- if dot >= 1
43
- c = Treat::Entities::Sentence.new(string)
44
- else
45
- c = Treat::Entities::Phrase.new(string)
46
- end
47
- elsif (self == Treat::Entities::Token) ||
48
- string.count(' ') == 0
49
- if string == "'s"
50
- c = Treat::Entities::Clitic.new(string)
51
- elsif string =~ /^[[:alpha:]\-']+$/ &&
52
- string.count(' ') == 0
53
- c = Treat::Entities::Word.new(string)
54
- elsif string =~ /^[[:digit:]]+$/
55
- c = Treat::Entities::Number.new(string)
56
- elsif string =~ /^[[:punct:]]+$/
57
- c = Treat::Entities::Punctuation.new(string)
58
- else
59
- c = Treat::Entities::Symbol.new(string)
60
- end
61
- elsif dot > 1 || string.count("\n") > 0
62
- c = Treat::Entities::Section.new(string)
63
- elsif dot >= 1 && dot < 5 && string.size > 5
64
- c = Treat::Entities::Sentence.new(string)
65
- elsif string.strip.count(' ') > 0
66
- c = Treat::Entities::Phrase.new(string)
67
- else
68
- c = Treat::Entities::Unknown.new(string) unless c
69
- end
70
- unless self == c.class || self == Treat::Entities::Entity || c.is_a?(self)
71
- raise "You said that \"#{string}\" was a #{cl(self).downcase}, " +
72
- "but Treat thinks it is a #{cl(c.class).downcase}."
73
- end
74
- c
75
- end
76
- def from_numeric(numeric)
77
- unless self == Treat::Entities::Number
78
- raise Treat::Exception,
79
- "Cannot create something else than a " +
80
- " number from a numeric object."
81
- end
82
- Treat::Entities::Number.new(numeric.to_s)
83
- end
84
- def from_folder(folder, exclude = ['cfs'])
85
- unless FileTest.directory?(folder)
86
- raise Treat::Exception,
87
- "Path '#{folder}' does not point to a folder."
88
- end
89
- unless File.readable?(folder)
90
- raise Treat::Exception,
91
- "Folder '#{folder}' is not readable."
92
- end
93
- unless self == Treat::Entities::Collection
94
- raise Treat::Exception,
95
- "Cannot create something else than a " +
96
- "collection from folder '#{folder}'."
97
- end
98
- c = Treat::Entities::Collection.new(folder)
99
- folder += '/' unless folder[-1] == '/'
100
- Dir[folder + '*'].each do |f|
101
- if FileTest.directory?(f)
102
- c2 = Treat::Entities::Collection.from_folder(f)
103
- c << c2
104
- else
105
- c << Treat::Entities::Document.from_file(f)
106
- end
107
- end
108
- c
109
- end
110
- def from_file(file)
111
- unless File.readable?(file)
112
- raise Treat::Exception,
113
- "Path '#{file}' does not point to a readable file."
114
- end
115
- if FileTest.directory?(file)
116
- from_folder(file)
117
- else
118
- ext = file.split('.')[-1]
119
- # Humanize the yaml extension.
120
- ext = 'yaml' if ext == 'yml'
121
- if ext == 'yaml'
122
- from_serialized_file(file)
123
- elsif ext == 'xml'
124
- beginning = nil
125
- File.open(file) do |w|
126
- beginning = w.readlines(200)
127
- end
128
- beginning = beginning.join(' ')
129
- if beginning.index('<treat>')
130
- from_serialized_file(file)
131
- else
132
- from_raw_file(file)
133
- end
134
- elsif ext == 'cfs'
135
- return
136
- else
137
- from_raw_file(file)
138
- end
139
- end
140
- end
141
- def from_raw_file(file)
142
- unless self == Treat::Entities::Document
143
- raise Treat::Exception,
144
- "Cannot create something else than a " +
145
- "document from raw file '#{file}'."
146
- end
147
- d = Treat::Entities::Document.new(file)
148
- d.read
149
- end
150
- def from_serialized_file(file)
151
- d = Treat::Entities::Document.new(file)
152
- d.unserialize
153
- d.children[0].set_as_root!
154
- d.children[0]
155
- end
156
- end
157
- end
@@ -1,33 +0,0 @@
1
- module Treat
2
- # Clusters together groups of algorithms that
3
- # perform similar functions.
4
- module Category
5
- # Require the Group class.
6
- require 'treat/group'
7
- # Add workers to the Entities based on the
8
- # configuration for a given category.
9
- def self.extended(category)
10
- Treat::Categories.list << category
11
- category.module_eval do
12
- groups.each do |group|
13
- group = const_get(group)
14
- group.targets.each do |entity_type|
15
- entity = Treat::Entities.const_get(cc(entity_type))
16
- entity.class_eval { add_workers group }
17
- end
18
- end
19
- end
20
- end
21
- # Provides a list of groups within this category.
22
- def groups; self.constants; end
23
- # Provide a list of methods implemented in
24
- # the groups contained within this category.
25
- def methods
26
- methods = []
27
- groups.each do |group|
28
- methods << const_get(group).method
29
- end
30
- methods
31
- end
32
- end
33
- end
@@ -1,116 +0,0 @@
1
- module Treat
2
- # Makes a class delegatable, allowing calls on it to be forwarded
3
- # to a worker class performing the appropriate call.
4
- module Delegatable
5
- # Add postprocessor methods to entities.
6
- def add_presets(group)
7
- group.presets.each do |preset_m, presets|
8
- define_method(preset_m) do |worker=nil, options={}|
9
- options = presets.merge(options)
10
- m = group.method
11
- send(m, worker, options)
12
- features[preset_m] = unset(m)
13
- end
14
- end
15
- end
16
- def add_preprocessors(group)
17
- group.preprocessors.each do |preprocessor_m, block|
18
- define_method(preprocessor_m) do |worker=nil, options={}|
19
- block.call(self, worker, options)
20
- features[preprocessor_m] = unset(group.method)
21
- end
22
- end
23
- end
24
- # Add postprocessor methods to entities.
25
- def add_postprocessors(group, m)
26
- group.postprocessors.each do |postprocessor_m, block|
27
- define_method(postprocessor_m) do |worker=nil, options={}|
28
- options[:postprocessor] = postprocessor_m
29
- send(m, worker, options)
30
- end
31
- end
32
- end
33
- # Add worker group to all entities of a class.
34
- def add_workers(group)
35
- # Define each method in group.
36
- self.class_eval do
37
- m = group.method
38
- add_presets(group)
39
- add_preprocessors(group)
40
- add_postprocessors(group, m)
41
- define_method(m) do |worker=nil, options={}|
42
- postprocessor =
43
- options.delete(:postprocessor)
44
- if !@features[m].nil?
45
- @features[m]
46
- else
47
- self.class.call_worker(
48
- self, m, worker,
49
- postprocessor,
50
- group, options
51
- )
52
- end
53
- end
54
- end
55
- end
56
- # Call a worker.
57
- def call_worker(entity, m, worker, postprocessor, group, options)
58
- if worker.nil? || worker == :default
59
- worker = find_worker(entity, group)
60
- end
61
- if not group.list.include?(worker)
62
- raise Treat::Exception, worker_not_found(worker, group)
63
- else
64
- worker_klass = group.const_get(cc(worker.to_s).intern)
65
- result = entity.accept(group, worker_klass, m, options)
66
- if postprocessor
67
- result = group.postprocessors[postprocessor].call(entity, result)
68
- end
69
- if group.type == :annotator
70
- f = postprocessor.nil? ? m : postprocessor
71
-
72
- entity.features[f] = result unless result == nil
73
- end
74
- result
75
- end
76
- end
77
- # Get the default worker for that language
78
- # inside the given group.
79
- def find_worker_for_language(language, group)
80
- lang = Treat::Languages.describe(language)
81
- lclass = cc(lang).intern
82
- if Treat::Languages.constants.include?(lclass)
83
- cat = group.to_s.split('::')[-2].intern
84
- lclass = Treat::Languages.get(lclass).const_get(cat)
85
- g = ucc(cl(group)).intern
86
- if !lclass[g] || !lclass[g][0]
87
- d = ucc(cl(group))
88
- d.gsub!('_', ' ')
89
- d = 'worker to find "' + d
90
- raise Treat::Exception, "No #{d}" +
91
- "\" is available for the #{lang} language."
92
- end
93
- return lclass[g][0]
94
- else
95
- raise Treat::Exception,
96
- "Language '#{lang}' is not supported (yet)."
97
- end
98
- end
99
- # Get which worker to use if none has been supplied.
100
- def find_worker(entity, group)
101
- worker = group.default.nil? ?
102
- self.find_worker_for_language(entity.language, group) :
103
- group.default
104
- if worker == :none
105
- raise Treat::Exception,
106
- "There is intentionally no default worker for #{group}."
107
- end
108
- worker
109
- end
110
- # Return an error message and suggest possible typos.
111
- def worker_not_found(klass, group)
112
- "Algorithm '#{ucc(cl(klass))}' couldn't be found in group #{group}." +
113
- did_you_mean?(group.list.map { |c| ucc(c) }, ucc(klass))
114
- end
115
- end
116
- end