treat 2.0.3 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. data/lib/treat/config/data/languages/agnostic.rb +6 -3
  2. data/lib/treat/config/data/languages/english.rb +1 -1
  3. data/lib/treat/config/data/workers/extractors.rb +8 -0
  4. data/lib/treat/loaders/stanford.rb +2 -0
  5. data/lib/treat/version.rb +1 -1
  6. data/lib/treat/workers/extractors/distance/levenshtein.rb +35 -0
  7. data/lib/treat/workers/extractors/name_tag/stanford.rb +4 -1
  8. data/lib/treat/workers/extractors/similarity/jaro_winkler.rb +38 -0
  9. data/lib/treat/workers/extractors/similarity/tf_idf.rb +19 -3
  10. data/lib/treat/workers/extractors/time/chronic.rb +6 -41
  11. data/lib/treat/workers/extractors/time/kronic.rb +20 -0
  12. data/lib/treat/workers/extractors/time/nickel.rb +0 -15
  13. data/lib/treat/workers/extractors/time/ruby.rb +2 -33
  14. data/lib/treat/workers/lexicalizers/taggers/stanford.rb +11 -10
  15. data/lib/treat/workers/processors/parsers/stanford.rb +60 -112
  16. data/spec/entities/collection.rb +29 -25
  17. data/spec/entities/document.rb +45 -44
  18. data/spec/entities/entity.rb +295 -294
  19. data/spec/entities/phrase.rb +21 -17
  20. data/spec/entities/token.rb +43 -40
  21. data/spec/entities/word.rb +5 -1
  22. data/spec/entities/zone.rb +26 -22
  23. data/spec/helper.rb +7 -2
  24. data/spec/learning/data_set.rb +145 -141
  25. data/spec/learning/export.rb +46 -42
  26. data/spec/learning/problem.rb +114 -110
  27. data/spec/learning/question.rb +46 -42
  28. data/spec/treat.rb +41 -37
  29. data/spec/workers/agnostic.rb +2 -2
  30. data/spec/workers/english.rb +12 -12
  31. metadata +7 -8
  32. data/files/21552208.html +0 -786
  33. data/files/nethttp-cheat-sheet-2940.html +0 -393
  34. data/lib/treat/workers/extractors/similarity/levenshtein.rb +0 -36
  35. data/spec/sandbox.rb +0 -294
  36. data/spec/workers/examples/english/mathematicians/euler.html +0 -21
@@ -1,393 +0,0 @@
1
- <!doctype html>
2
-
3
- <!--[if lt IE 7 ]> <html lang="en" class="ie6"> <![endif]-->
4
- <!--[if IE 7 ]> <html lang="en" class="ie7"> <![endif]-->
5
- <!--[if IE 8 ]> <html lang="en" class="ie8"> <![endif]-->
6
- <!--[if IE 9 ]> <html lang="en" class="ie9"> <![endif]-->
7
- <!--[if (gt IE 9)|!(IE)]><!--> <html lang="en" class="no-js"> <!--<![endif]-->
8
-
9
- <head>
10
- <meta charset="utf-8">
11
- <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
12
- <title>Net::HTTP Cheat Sheet</title>
13
- <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
14
- <link rel="icon" href="/favicon.png" type="image/png">
15
- <link rel="shortcut icon" href="/favicon.png" type="image/png">
16
- <link rel="alternate" type="application/rss+xml" title="Ruby Inside" href="http://www.rubyinside.com/feed/" />
17
- <link rel="stylesheet" href="http://www.rubyinside.com/wp-content/themes/ri2011/css/ri.css">
18
- <!-- script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.4.1/jquery.min.js"></script -->
19
- <script type="text/javascript" src="http://use.typekit.com/dmj7czx.js"></script>
20
- <script type="text/javascript">try{Typekit.load();}catch(e){}</script>
21
- <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.7.0/jquery.min.js"></script>
22
-
23
- <!--[if lt IE 9]>
24
- <script src="http://html5shiv.googlecode.com/svn/trunk/html5.js"></script>
25
- <![endif]-->
26
- </head>
27
-
28
- <body class="single single-post postid-2940 single-format-standard">
29
- <div id="superheader">Want to stay on top? <a href="http://rubyweekly.com/?m">Ruby Weekly</a> is a once-weekly e-mail newsletter covering the latest Ruby and Rails news.</div>
30
- <div id="container">
31
- <div class="outerheader top">
32
- <div class="right"><form method="get" id="searchform" action="/"><input type="text" value="" name="s" id="s" size="24" /><input type="submit" id="searchsubmit" value="Search" /></form>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href="http://www.rubyinside.com/feed/"><img src="http://feeds2.feedburner.com/~fc/RubyInside?bg=99CCFF&fg=333333&anim=0" align="top" alt="Feed Icon" /></a></div>
33
- <a href="/"><img src="http://www.rubyinside.com/wp-content/themes/ri2011/css/logo.png" id="logo" alt="Ruby Inside - A Ruby Blog" /></a>
34
- </div>
35
-
36
- <div class="outerheader masthead">
37
- <div class="menu">
38
- <ul>
39
- <li><a href="/" class="home">Home</a></li> <li><a href="/about/">About</a></li>
40
- <li><a href="/archives/">Archives</a></li>
41
- <!-- li><a href="/advertise/">Advertising</a></li -->
42
- <li><a href="http://jobs.rubyinside.com/">Ruby and Rails Jobs</a></li>
43
- <li class="right"><a href="/feed/">RSS</a> <!-- span class="count">(24146)</span --></li>
44
- <li class="right">
45
- </li>
46
- <li class="right"><a href="http://twitter.com/rubyinside">Follow us on Twitter</a> <!-- span class="count">(5548)</span --></li>
47
- </ul>
48
- </div>
49
-
50
-
51
- </div>
52
-
53
- <div id="innercontainer"><div id="page">
54
-
55
-
56
- <div id="sidebar">
57
- <div style="margin-top: 12px"><a href="http://twitter.com/RubyInside" class="twitter-follow-button">Follow @RubyInside</a>
58
- <script src="http://platform.twitter.com/widgets.js" type="text/javascript"></script></div>
59
- <div id="execphp-3" class="widget-container section widget_execphp">
60
- <div class="execphpwidget"> <h3><a href="http://jobs.rubyinside.com/">Ruby and Rails Jobs</a></h3>
61
- <div class="inner">
62
- <ul>
63
- <!-- ? readfile('http://www.rubyinside.com/jobs.html'); ? -->
64
- <li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/778380">Front-End Engineer</a></div><div class='company'>New Relic</div><div class="location">San Francisco, California</div></li>
65
- <li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/780028">Ruby on Rails Developer @WeedMaps</a></div><div class='company'>WeedMaps</div><div class="location">Denver, Colorado</div></li>
66
- <li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/766252">Ruby on Rails Developer</a></div><div class='company'>CSD</div><div class="location">Austin, Texas</div></li>
67
- <li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/776046">Front-End Developer</a></div><div class='company'>Bedrocket Media Ventures</div><div class="location">New York, New York</div></li>
68
- <li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/769030">Telecommute-Principal Ruby Consultant</a></div><div class='company'>IMPRTL Inc</div><div class="location">Chicago, Illinois</div></li>
69
- <li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/772705">Senior Rails Developer</a></div><div class='company'>Lightspeed Systems</div><div class="location">Austin, Texas</div></li>
70
- <li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/776045">Lead Software Developer</a></div><div class='company'>Bedrocket Media Ventures</div><div class="location">New York, New York</div></li>
71
- <li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/771110">Gifted RoR Software Engineer Technologist</a></div><div class='company'>Chelsmore Apartments</div><div class="location">New York, New York</div></li>
72
- <div class="minor"><a href="http://ruby.jobamatic.com/">More jobs &raquo;</a> : <a href="/post-a-job">Post a Job</a></div>
73
- </div>
74
- </div>
75
- </div><div id="text-3" class="widget-container section widget_text"> <div class="textwidget"><!-- a href="http://owningrails.com/?ref=5905208113"><img src="http://owningrails.com/images/ad-with.png" /></a --><!-- a href="http://rubyweekly.com/"><img src="http://rubyinside.com/wp-content/themes/ri20102/images/rw-ri-box.gif" /></a --><!-- a href="https://cooperpress.com/19walkthrough"><img src="/images/ruby19wt2.gif" /></a --><!-- a href="https://cooperpress.com/rubyreloaded"><img src="http://rubyinside.com/images/reloaded-riad.png" /></a --></div>
76
- </div>
77
-
78
- <!-- if (is_front_page() && !is_paged()) { -->
79
- </div>
80
-
81
- <div id="main">
82
-
83
-
84
-
85
- <div class="post-2940 post type-post status-publish format-standard hentry category-reference" id="post-2940">
86
-
87
- <div class="content">
88
-
89
- <div class="title"><h2>Net::HTTP Cheat Sheet</h2></div>
90
-
91
- <p class="author">By <a href="http://www.rubyinside.com/author/admin" title="View all posts by Peter Cooper">Peter Cooper</a> <span class="date">/ January 16, 2010</span></p>
92
- <div id="thecontent"><p><img src="http://www.rubyinside.com/wp-content/uploads/2010/01/http-is-a-hamster-on-rollerblades-says-marc-andre-cournoyer.gif" width="120" height="120" alt="http-is-a-hamster-on-rollerblades-says-marc-andre-cournoyer.gif" style="float:left; margin-right:12px; margin-bottom:12px; border:1px #000000 solid;" />Norwegian Rubyist <a href="http://august.lilleaas.net/">August Lilleaas</a> has been busy putting together <a href="http://github.com/augustl/net-http-cheat-sheet">a ton of examples</a> of using the <a href="http://ruby-doc.org/stdlib/libdoc/net/http/rdoc/index.html">Net::HTTP</a> Ruby library that comes with most Ruby distributions. I asked him if it'd be okay to put some of them directly on Ruby Inside for reference purposes and he said "No problem!"</p>
93
- <p>It's worth noting that Net::HTTP has been superseded in many areas by libraries like John Nunemaker's <a href="http://github.com/jnunemaker/httparty">HTTParty</a> and Paul DIx's high performance <a href="http://github.com/pauldix/typhoeus">Typhoeus</a>, but as part of the standard library, Net::HTTP is still a popular option though it doesn't have the easiest API to remember.</p>
94
- <p>Here's a selection of August's examples for some of the most common operations. Want to see <i>all</i> of the examples and follow any updates made to them? Check out August's <a href="http://github.com/augustl/net-http-cheat-sheet">net-http-cheat-sheet GitHub repo</a>.</p>
95
- <h3>Standard HTTP Request</h3>
96
- <pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
97
- <span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
98
-
99
- <span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://google.com/</span><span class="punct">")</span>
100
-
101
- <span class="comment"># Shortcut</span>
102
- <span class="ident">response</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">get_response</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">)</span>
103
-
104
- <span class="comment"># Will print response.body</span>
105
- <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">get_print</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">)</span>
106
-
107
- <span class="comment"># Full</span>
108
- <span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
109
- <span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">))</span></pre>
110
- <h3>Basic Auth</h3>
111
- <pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
112
- <span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
113
-
114
- <span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://google.com/</span><span class="punct">")</span>
115
-
116
- <span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
117
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
118
- <span class="ident">request</span><span class="punct">.</span><span class="ident">basic_auth</span><span class="punct">("</span><span class="string">username</span><span class="punct">",</span> <span class="punct">"</span><span class="string">password</span><span class="punct">")</span>
119
- <span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span></pre>
120
- <h3>Dealing with response objects</h3>
121
- <pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
122
- <span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
123
-
124
- <span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://google.com/</span><span class="punct">")</span>
125
-
126
- <span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
127
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
128
-
129
- <span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
130
-
131
- <span class="ident">response</span><span class="punct">.</span><span class="ident">code</span> <span class="comment"># => 301</span>
132
- <span class="ident">response</span><span class="punct">.</span><span class="ident">body</span> <span class="comment"># => The body (HTML, XML, blob, whatever)</span>
133
- <span class="comment"># Headers are lowercased</span>
134
- <span class="ident">response</span><span class="punct">["</span><span class="string">cache-control</span><span class="punct">"]</span> <span class="comment"># => public, max-age=2592000</span></pre>
135
- <h3>POST form request</h3>
136
- <pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
137
- <span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
138
-
139
- <span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://example.com/search</span><span class="punct">")</span>
140
-
141
- <span class="comment"># Shortcut</span>
142
- <span class="ident">response</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">post_form</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">,</span> <span class="punct">{"</span><span class="string">q</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">My query</span><span class="punct">",</span> <span class="punct">"</span><span class="string">per_page</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">50</span><span class="punct">"})</span>
143
-
144
- <span class="comment"># Full control</span>
145
- <span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
146
-
147
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Post</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
148
- <span class="ident">request</span><span class="punct">.</span><span class="ident">set_form_data</span><span class="punct">({"</span><span class="string">q</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">My query</span><span class="punct">",</span> <span class="punct">"</span><span class="string">per_page</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">50</span><span class="punct">"})</span>
149
-
150
- <span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span></pre>
151
- <h3>File upload - input type="file" style</h3>
152
- <pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
153
- <span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
154
-
155
- <span class="comment"># Token used to terminate the file in the post body. Make sure it is not</span>
156
- <span class="comment"># present in the file you're uploading.</span>
157
- <span class="constant">BOUNDARY</span> <span class="punct">=</span> <span class="punct">"</span><span class="string">AaB03x</span><span class="punct">"</span>
158
-
159
- <span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://something.com/uploads</span><span class="punct">")</span>
160
- <span class="ident">file</span> <span class="punct">=</span> <span class="punct">"</span><span class="string">/path/to/your/testfile.txt</span><span class="punct">"</span>
161
-
162
- <span class="ident">post_body</span> <span class="punct">=</span> <span class="punct">[]</span>
163
- <span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string">--<span class="expr">#{BOUNDARY}</span><span class="escape">rn</span></span><span class="punct">"</span>
164
- <span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string">Content-Disposition: form-data; name=<span class="escape">"</span>datafile<span class="escape">"</span>; filename=<span class="escape">"</span><span class="expr">#{File.basename(file)}</span><span class="escape">"rn</span></span><span class="punct">"</span>
165
- <span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string">Content-Type: text/plain<span class="escape">rn</span></span><span class="punct">"</span>
166
- <span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string"><span class="escape">rn</span></span><span class="punct">"</span>
167
- <span class="ident">post_body</span> <span class="punct">< <</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">(</span><span class="ident">file</span><span class="punct">)</span>
168
- <span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string"><span class="escape">rn</span>--<span class="expr">#{BOUNDARY}</span>--<span class="escape">rn</span></span><span class="punct">"</span>
169
-
170
- <span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
171
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Post</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
172
- <span class="ident">request</span><span class="punct">.</span><span class="ident">body</span> <span class="punct">=</span> <span class="ident">post_body</span><span class="punct">.</span><span class="ident">join</span>
173
- <span class="ident">request</span><span class="punct">["</span><span class="string">Content-Type</span><span class="punct">"]</span> <span class="punct">=</span> <span class="punct">"</span><span class="string">multipart/form-data, boundary=<span class="expr">#{BOUNDARY}</span></span><span class="punct">"</span>
174
-
175
- <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span></span></span></span></span></span></span></pre>
176
- <h3>SSL/HTTPS request</h3>
177
- <p><strong>Update: There are some good reasons why this code example is bad. It introduces a potential security vulnerability if it's essential you use the server certificate to verify the identity of the server you're connecting to. There's <a href="http://www.rubyinside.com/how-to-cure-nethttps-risky-default-https-behavior-4010.html">a fix for the issue though!</a></strong></p>
178
- <pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/https</span><span class="punct">"</span>
179
- <span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
180
-
181
- <span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">https://secure.com/</span><span class="punct">")</span>
182
- <span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
183
- <span class="ident">http</span><span class="punct">.</span><span class="ident">use_ssl</span> <span class="punct">=</span> <span class="constant">true</span>
184
- <span class="ident">http</span><span class="punct">.</span><span class="ident">verify_mode</span> <span class="punct">=</span> <span class="constant">OpenSSL</span><span class="punct">::</span><span class="constant">SSL</span><span class="punct">::</span><span class="constant">VERIFY_NONE</span>
185
-
186
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
187
-
188
- <span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
189
- <span class="ident">response</span><span class="punct">.</span><span class="ident">body</span>
190
- <span class="ident">response</span><span class="punct">.</span><span class="ident">status</span>
191
- <span class="ident">response</span><span class="punct">["</span><span class="string">header-here</span><span class="punct">"]</span> <span class="comment"># All headers are lowercase</span></pre>
192
- <h3>SSL/HTTPS request with PEM certificate</h3>
193
- <pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/https</span><span class="punct">"</span>
194
- <span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
195
-
196
- <span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">https://secure.com/</span><span class="punct">")</span>
197
- <span class="ident">pem</span> <span class="punct">=</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">("</span><span class="string">/path/to/my.pem</span><span class="punct">")</span>
198
- <span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
199
- <span class="ident">http</span><span class="punct">.</span><span class="ident">use_ssl</span> <span class="punct">=</span> <span class="constant">true</span>
200
- <span class="ident">http</span><span class="punct">.</span><span class="ident">cert</span> <span class="punct">=</span> <span class="constant">OpenSSL</span><span class="punct">::</span><span class="constant">X509</span><span class="punct">::</span><span class="constant">Certificate</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">pem</span><span class="punct">)</span>
201
- <span class="ident">http</span><span class="punct">.</span><span class="ident">key</span> <span class="punct">=</span> <span class="constant">OpenSSL</span><span class="punct">::</span><span class="constant">PKey</span><span class="punct">::</span><span class="constant">RSA</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">pem</span><span class="punct">)</span>
202
- <span class="ident">http</span><span class="punct">.</span><span class="ident">verify_mode</span> <span class="punct">=</span> <span class="constant">OpenSSL</span><span class="punct">::</span><span class="constant">SSL</span><span class="punct">::</span><span class="constant">VERIFY_PEER</span>
203
-
204
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span></pre>
205
- <h3>REST methods</h3>
206
- <pre><span class="comment"># Basic REST.</span>
207
- <span class="comment"># Most REST APIs will set semantic values in response.body and response.code.</span>
208
- <span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
209
-
210
- <span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">api.restsite.com</span><span class="punct">")</span>
211
-
212
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Post</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">/users</span><span class="punct">")</span>
213
- <span class="ident">request</span><span class="punct">.</span><span class="ident">set_form_data</span><span class="punct">({"</span><span class="string">users[login]</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">quentin</span><span class="punct">"})</span>
214
- <span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
215
- <span class="comment"># Use nokogiri, hpricot, etc to parse response.body.</span>
216
-
217
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">/users/1</span><span class="punct">")</span>
218
- <span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
219
- <span class="comment"># As with POST, the data is in response.body.</span>
220
-
221
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Put</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">/users/1</span><span class="punct">")</span>
222
- <span class="ident">request</span><span class="punct">.</span><span class="ident">set_form_data</span><span class="punct">({"</span><span class="string">users[login]</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">changed</span><span class="punct">"})</span>
223
- <span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
224
-
225
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Delete</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">/users/1</span><span class="punct">")</span>
226
- <span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span></pre>
227
- <p>There are more in August's repo if you want to keep browsing..</p>
228
- <p style="background-color: #ffc; font-weight: bold; font-size: 13px; color: #000;">Job: New Relic is <a href="http://ruby.jobamatic.com/a/jbb/job-details/165476">looking for a Ruby on Rails developer in Portland, Oregon.</a></p>
229
- </div>
230
-
231
-
232
-
233
-
234
- <!-- div style="margin-bottom: 8px; background-color: #ffc; text-align: center; padding: 6px"><a href="http://zfer.us/EKm97" style="text-decoration: none; margin: 0; padding: 0" rel="nofollow"><img src="http://www.rubyinside.com/images/railstutorial-box.gif" /></a></div -->
235
- <!-- <a href="http://www.rubyinside.com/19walkthrough/"><img src="http://www.rubyinside.com/images/19pro.gif" /></a> -->
236
-
237
-
238
-
239
-
240
-
241
- </div>
242
- </div>
243
-
244
- <div id="related"><h3>Related Posts</h3><ul><li><a href='http://www.rubyinside.com/cheat-sheet-for-rest-on-rails-261.html' rel='bookmark' title='Cheat Sheet for REST on Rails'>Cheat Sheet for REST on Rails</a></li>
245
- <li><a href='http://www.rubyinside.com/quick-ruby-reference-cheat-sheet-47.html' rel='bookmark' title='Quick Ruby Reference / Cheat Sheet'>Quick Ruby Reference / Cheat Sheet</a></li>
246
- <li><a href='http://www.rubyinside.com/ruby-on-rails-testing-cheat-sheet-206.html' rel='bookmark' title='Ruby on Rails Testing Cheat Sheet'>Ruby on Rails Testing Cheat Sheet</a></li>
247
- </ul></div>
248
- <div id="commentzone">
249
-
250
-
251
-
252
- <h3 id="comments-title">Comments</h3>
253
-
254
- <ol class="commentlist">
255
- <li class="comment even thread-even depth-1" id="comment-40542">
256
- <img alt='' src='http://0.gravatar.com/avatar/6268c7528d855f1cef5696a00d159909?s=64&amp;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D64&amp;r=G' class='avatar avatar-64 photo' height='64' width='64' /> <cite><a href='http://twitter.com/peterc' rel='external nofollow' class='url'>Peter Cooper</a> says:</cite><br />
257
-
258
- <small class="commentmetadata">January 16, 2010 at 1:02 am</small>
259
-
260
-
261
- <p>The idea for the hamster on rollerskates issue can be discovered by checking out its filename.. :-)</p>
262
-
263
-
264
- </li> <li class="comment odd alt thread-odd thread-alt depth-1" id="comment-40546">
265
- <img alt='' src='http://0.gravatar.com/avatar/aa31b79adedc3f60547769f1a8971ba6?s=64&amp;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D64&amp;r=G' class='avatar avatar-64 photo' height='64' width='64' /> <cite><a href='http://emmanueloga@gmail.com' rel='external nofollow' class='url'>Emmanuel</a> says:</cite><br />
266
-
267
- <small class="commentmetadata">January 16, 2010 at 3:50 pm</small>
268
-
269
-
270
- <p>Anybody knows if the HTTP::Net issues related with Timeout is still present in all (or any) ruby versions?</p>
271
- <p><a href="http://blog.headius.com/2008/02/rubys-threadraise-threadkill-timeoutrb.html" rel="nofollow">http://blog.headius.com/2008/02/rubys-threadraise-threadkill-timeoutrb.html</a></p>
272
-
273
-
274
- </li> <li class="comment even thread-even depth-1" id="comment-40550">
275
- <img alt='' src='http://0.gravatar.com/avatar/2bfc6436d28fc4a224e3ff1702a046d0?s=64&amp;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D64&amp;r=G' class='avatar avatar-64 photo' height='64' width='64' /> <cite>ratbeard says:</cite><br />
276
-
277
- <small class="commentmetadata">January 17, 2010 at 4:23 pm</small>
278
-
279
-
280
- <p>Another library that abstracts over Net::HTTP is Adam Wiggin's 'rest-client' library. It seems more targeted toward single requests (i.e. Restclient.get 'google.com') than mixing in and building a request class like HTTParty, though both libraries do variants of each style. It has a great interactive shell and request logging.</p>
281
- <p><a href="http://github.com/archiloque/rest-client/" rel="nofollow">http://github.com/archiloque/rest-client/</a></p>
282
- <p>I **highly** recommend another library authored by Mr. Wiggins that abstracts over the messy file system api, 'rush'. I'm really surprised that more libraries and apps that have a non-trivial amount of file system code don't use it.</p>
283
- <p><a href="http://rush.heroku.com/" rel="nofollow">http://rush.heroku.com/</a></p>
284
- <p>The code is very clean in both, I tip my hat to you Mr. Wiggins.</p>
285
-
286
-
287
- </li> <li class="comment odd alt thread-odd thread-alt depth-1" id="comment-40556">
288
- <img alt='' src='http://0.gravatar.com/avatar/e61f142f400df8299d37c2bce09e3478?s=64&amp;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D64&amp;r=G' class='avatar avatar-64 photo' height='64' width='64' /> <cite><a href='http://august.lilleaas.net/' rel='external nofollow' class='url'>August Lilleaas</a> says:</cite><br />
289
-
290
- <small class="commentmetadata">January 19, 2010 at 10:19 am</small>
291
-
292
-
293
- <p>I always use one of these libraries when I can (rest-client, httparty, ...). In some cases you'd be better off without dependencies, though, such as in small shell scripts, etc.</p>
294
-
295
-
296
- </li> </ol>
297
-
298
-
299
-
300
- </div>
301
-
302
- <h3>Other Posts to Enjoy</h3>
303
- <div class="widget_featured-posts noborder four">
304
- <ul class="clearfix"><li>
305
-
306
- <a href="http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html"><img src="http://www.rubyinside.com/wp-content/plugins/wp-featured-post-with-thumbnail/scripts/timthumb.php?src=http://www.rubyinside.com/wp-content/uploads/2012/11/spaced.png&amp;h=73&amp;w=73&amp;zc=1" class="alignleft" alt="The Split is Not Enough: Unicode Whitespace Shenigans for Rubyists" /></a>
307
- <h4 class="featured-title"><a href="http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html">The Split is Not Enough: Unicode Whitespace Shenigans for Rubyists</a></h4>
308
- </li>
309
- <li>
310
-
311
- <a href="http://www.rubyinside.com/mega-february-2012-ruby-news-5815.html"><img src="http://www.rubyinside.com/wp-content/plugins/wp-featured-post-with-thumbnail/scripts/timthumb.php?src=http://www.rubyinside.com/wp-content/uploads/2012/03/mega.png&amp;h=73&amp;w=73&amp;zc=1" class="alignleft" alt="The Mega Ruby News and Release Roundup for February 2012" /></a>
312
- <h4 class="featured-title"><a href="http://www.rubyinside.com/mega-february-2012-ruby-news-5815.html">The Mega Ruby News and Release Roundup for February 2012</a></h4>
313
- </li>
314
- <li>
315
-
316
- <a href="http://www.rubyinside.com/sinatra-book-review-5704.html"><img src="http://www.rubyinside.com/wp-content/plugins/wp-featured-post-with-thumbnail/scripts/timthumb.php?src=http://www.rubyinside.com/wp-content/uploads/2011/12/sinatra-up.jpeg&amp;h=73&amp;w=73&amp;zc=1" class="alignleft" alt="A Lagom Review of O&#8217;Reilly&#8217;s &#8216;Sinatra Up and Running&#8217;" /></a>
317
- <h4 class="featured-title"><a href="http://www.rubyinside.com/sinatra-book-review-5704.html">A Lagom Review of O&#8217;Reilly&#8217;s &#8216;Sinatra Up and Running&#8217;</a></h4>
318
- </li>
319
- <li>
320
-
321
- <a href="http://www.rubyinside.com/rubys-unary-operators-and-how-to-redefine-their-functionality-5610.html"><img src="http://www.rubyinside.com/wp-content/plugins/wp-featured-post-with-thumbnail/scripts/timthumb.php?src=http://www.rubyinside.com/wp-content/uploads/2011/11/unary.gif&amp;h=73&amp;w=73&amp;zc=1" class="alignleft" alt="Ruby&#8217;s Unary Operators and How to Define Their Functionality" /></a>
322
- <h4 class="featured-title"><a href="http://www.rubyinside.com/rubys-unary-operators-and-how-to-redefine-their-functionality-5610.html">Ruby&#8217;s Unary Operators and How to Define Their Functionality</a></h4>
323
- </li>
324
- </ul> </div>
325
-
326
-
327
-
328
-
329
-
330
- <h3>Twitter Mentions</h3>
331
- <div id="boastful"></div>
332
-
333
- <div class="previousnext">
334
- <div class="next"><a href="http://www.rubyinside.com/this-weeks-ruby-news-rspec-2-8-0-rc1-minitest-2-8-0-and-whats-new-in-bundler-1-1-5637.html" rel="next">Next Post &raquo;</a></div>
335
- <div class="previous"><a href="http://www.rubyinside.com/the-ruby-standard-library-to-be-converted-to-gems-for-ruby-2-0-5586.html" rel="prev">&laquo; Previous Post</a></div>
336
- </div>
337
-
338
- <!-- <h3>Want to get up to speed with Ruby 1.9?</h3>
339
-
340
- <p><a href="http://www.rubyinside.com/19walkthrough/"><img src="http://www.rubyinside.com/images/19pro.gif" /></a> </p>
341
- -->
342
-
343
-
344
- </div>
345
-
346
- </div>
347
-
348
- </div> <!-- inner -->
349
-
350
-
351
-
352
-
353
- </div> <!-- container -->
354
-
355
-
356
- <div id="footer">
357
- <div class="inner">
358
- <p>Copyright &copy; 2006&ndash;2012 <a href="http://twitter.com/peterc">Peter Cooper</a></p>
359
- </div>
360
- </div>
361
-
362
- <script type="text/javascript" src="http://engine.rubyrow.net/z/1313/adzerk1_4_16_19,adzerk2_4_16_19,adzerk3_4_16_19,adzerk4_4_16_19,adzerk5_4_16_19,adzerk6_4_16_19,adzerk7_4_16_19,adzerk8_4_16_19"></script>
363
- <script type="text/javascript" src="http://www.rubyinside.com/wp-content/themes/ri2011/jquery.boastful.js"></script>
364
-
365
- <script type="text/javascript">
366
- $(document).ready(function() {
367
- $('#boastful').boastful();
368
- });
369
- </script>
370
-
371
- <script type="text/javascript">
372
- var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
373
- document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
374
- </script>
375
- <script type="text/javascript">
376
- var pageTracker = _gat._getTracker("UA-2237791-3");
377
- pageTracker._initData();
378
- pageTracker._trackPageview();
379
- </script>
380
-
381
-
382
- </body>
383
- </html>
384
-
385
- <!-- div style="float:right"><a href="http://twitter.com/share" class="twitter-share-button" data-url="http://www.rubyinside.com/rubys-unary-operators-and-how-to-redefine-their-functionality-5610.html" data-counturl="http://www.rubyinside.com/rubys-unary-operators-and-how-to-redefine-their-functionality-5610.html" data-text="Ruby&#8217;s Unary Operators and How to Define Their Functionality" data-count="horizontal">Tweet</a></div -->
386
- <!-- div style="margin-bottom: -12px; margin-top: -10px"><iframe src="http://www.facebook.com/plugins/like.php?href=http%3A%2F%2Fwww.rubyinside.com%2Frubys-unary-operators-and-how-to-redefine-their-functionality-5610.html&amp;layout=standard&amp;show_faces=false&amp;width=420&amp;action=like&amp;colorscheme=light&amp;height=26" scrolling="no" frameborder="0" style="border:none; overflow:hidden; width:420px; height:26px;" allowTransparency="true"></iframe></div -->
387
- <!-- div><script src="http://connect.facebook.net/en_US/all.js#xfbml=1"></script><fb:like href="http://x.com/" show_faces="false" width="450"></fb:like></div -->
388
-
389
-
390
-
391
- <!-- Dynamic page generated in 0.233 seconds. -->
392
- <!-- Cached page generated by WP-Super-Cache on 2013-01-04 01:31:41 -->
393
- <!-- super cache -->
@@ -1,36 +0,0 @@
1
- class Treat::Workers::Extractors::Similarity
2
- # Default options.
3
- DefaultOptions = {
4
- with: '',
5
- ins_cost: 1,
6
- del_cost: 1,
7
- sub_cost: 1
8
- }
9
- # Return the levensthein distance between
10
- # two strings taking into account the costs
11
- # of insertion, deletion, and substitution.
12
- # Used by did_you_mean? to detect typos.
13
- def self.similarity(entity, options)
14
- first, other = entity.to_s, options[:with].to_s
15
- options = DefaultOptions.merge(options)
16
- other, ins, del, sub, = options[:with],
17
- options[:inst_cost], options[:del_cost],
18
- options[:sub_cost]
19
- fill, dm = [0] * (first.length - 1).abs,
20
- [(0..first.length).collect { |i| i * ins}]
21
- for i in 1..other.length
22
- dm[i] = [i * del, fill.flatten]
23
- end
24
- for i in 1..other.length
25
- for j in 1..first.length
26
- dm[i][j] = [
27
- dm[i-1][j-1] + (first[i-1] ==
28
- other[i-1] ? 0 : sub), dm[i][j-1] +
29
- ins, dm[i-1][j] + del
30
- ].min
31
- end
32
- end
33
- dm[other.length][first.length]
34
- end
35
-
36
- end
data/spec/sandbox.rb DELETED
@@ -1,294 +0,0 @@
1
- # encoding: utf-8
2
- require_relative '../lib/treat'
3
-
4
- Treat.databases.mongo.db = 'treat_test'
5
- Treat.libraries.stanford.model_path =
6
- '/ruby/stanford-core-nlp-minimal/models/'
7
- Treat.libraries.stanford.jar_path =
8
- '/ruby/stanford-core-nlp-minimal/bin/'
9
- Treat.libraries.punkt.model_path =
10
- '/ruby/punkt/models/'
11
- Treat.libraries.reuters.model_path =
12
- '/ruby/reuters/models/'
13
-
14
- # include Treat::Core::DSL
15
-
16
- Treat::Builder.new do
17
- s = sentence "Hello, world!"
18
- s.print_tree
19
- end
20
-
21
- p = paragraph('A walk in the park. A trip on a boat.').segment
22
- p.visualize :dot, file: 'test.dot'
23
- =begin
24
-
25
- g = group("I was running")
26
- puts g.tag.inspect
27
-
28
- Treat.libraries.stanford.jar_path = '/ruby/treat/bin/'
29
- Treat.libraries.stanford.model_path = '/ruby/treat/models/'
30
-
31
- p = paragraph
32
- s = sentence
33
- w = word
34
-
35
- p = phrase 'hello world'
36
- e = email 'louis@gmail.com'
37
-
38
- d = question(:is_feature, :word)
39
- =end
40
- #d = document Treat.paths.spec + 'workers/examples/english/economist/hungarys_troubles.txt'
41
- #d.apply :chunk, :segment, :tokenize, :tag, :category, :name_tag
42
- #d.print_tree
43
- #d = document Treat.paths.spec + 'workers/examples/english/economist/saving_the_euro.odt'
44
- #d.print_tree
45
- =begin
46
- d = document 'test.htm'
47
- d.apply :chunk
48
- #d.serialize :yaml, file: 'test444.yaml'
49
- d.set :test, 2
50
- d.serialize :mongo, db: 'test'
51
- d.set :test, 3
52
- d.serialize :mongo, db: 'test'
53
- d.apply :segment, :tokenize, :tag, :category
54
- puts d.verb_count
55
- #d2 = document id: d.id, db: 'test'
56
- d2 = document 'features.test' => 3, db: 'test'
57
- d2.apply :segment, :tokenize, :tag, :category
58
- puts d2.verb_count
59
- #d.print_tree
60
- #s = document 'http://www.economist.com'
61
-
62
- p = phrase 'hello', 'world', '!'
63
- puts p.to_s
64
- puts p.to_str
65
- =end
66
-
67
- =begin
68
- ### Super basics.
69
- puts p.value
70
-
71
- p << 'bitch'
72
- p << word('hello')
73
- puts p.to_s
74
- puts p.to_str
75
- puts p.value
76
- puts p.to_ary.inspect
77
- =end
78
-
79
- =begin
80
-
81
- ### Configuration
82
-
83
- # A boolean value indicating whether to silence the output of external libraries (e.g. Stanford tools, Enju, LDA, Ruby-FANN) when they are used.
84
- puts Treat.core.verbosity.silence
85
- # A boolean value indicating whether to explain the steps that Treat is performing.
86
- puts Treat.core.verbosity.debug
87
- # A boolean value indicating whether Treat should try to detect the language of newly input text.
88
- puts Treat.core.language.detect
89
- # The language to default to when detection is off.
90
- puts Treat.core.language.default
91
- # A symbol representing the finest level at which language detection should be performed if language detection is turned on.
92
- puts Treat.core.language.detect_at
93
-
94
- # A directory in which to create temporary files.
95
- puts Treat.paths.tmp
96
- # A directory in which to store downloaded files.
97
- puts Treat.paths.files
98
- # A directory containing trained models for various tasks.
99
- puts Treat.paths.models
100
- # A directory containing the spec files.
101
- puts Treat.paths.spec
102
- # A directory containing executables and JAR files.
103
- puts Treat.paths.bin
104
- puts Treat.paths.lib
105
-
106
- # Set up Mongoid.
107
- Treat.databases.mongo.db = 'your_database'
108
- Treat.databases.mongo.host = 'localhost'
109
- Treat.databases.mongo.port = '27017'
110
-
111
- # Transparent string casting.
112
- s = 'inflection'.stem
113
- # is equivalent to
114
- s = 'inflection'.to_entity.stem
115
- # which comes down to
116
- s = word('inflection').stem
117
-
118
- # Transparent number casting.
119
- n = 2.ordinal
120
- # is equivalent to
121
- s = 2.to_entity.ordinal
122
- # which comes down to
123
- s = number(2).ordinal
124
- =end
125
- =begin
126
- ### BASIC USAGE
127
-
128
- # Create a sentence
129
- s = sentence 'Those who dream by day know of at least ' +
130
- '19 things that escape those who dream only at night.'
131
-
132
- # Tokenize and tag it.
133
- s.tokenize.tag
134
-
135
- # View the sentence structure.
136
- s.print_tree
137
-
138
- # Iterate over the tokens.
139
- s.each_token do |tok|
140
- puts tok.value
141
- puts tok.type
142
- end
143
-
144
-
145
-
146
- # Arrays instead of iterators.
147
- (s.nouns + s.adjectives).each do |word|
148
- puts word.synonyms
149
- puts word.antonyms
150
- end
151
-
152
- # Functions on numbers.
153
- s.each_number do |num|
154
- puts num.ordinal
155
- puts num.cardinal
156
- end
157
-
158
- # See all the annotations.
159
- s.each do |tok|
160
- puts tok.inspect
161
- end
162
-
163
- # Lazy way of doing all of the above.
164
- s = sentence 'Those who dream by day know of at least ' +
165
- '19 things that escape those who dream only at night.'
166
-
167
- s.apply :tokenize, :tag, :category,
168
- :stem, :hyponyms, :hypernyms,
169
- :antonyms, :ordinal, :cardinal
170
-
171
- =end
172
-
173
- =begin
174
- ### A BIT MORE ADVANCED USAGE
175
-
176
- section = section "Obama-Sarkozy Meeting\n" +
177
- "Obama and Sarkozy met on January 1st to investigate " +
178
- "the possibility of a new rescue plan. President " +
179
- "Sarkozy is to meet Merkel next Tuesday in Berlin."
180
-
181
- # Chunk: split the titles and paragraphs.
182
- # Segment: perform sentence segmentation.
183
- # Parse: parse the syntax of each sentence.
184
- section.apply :chunk, :segment, :parse
185
-
186
- # View the tree structure.
187
- section.print_tree
188
-
189
- # Get some basic info on the text.
190
- puts section.title
191
- puts section.sentence_count
192
- puts section.word_count
193
-
194
- section.apply :category
195
- puts section.noun_count
196
- puts section.frequency_of 'president'
197
-
198
- section.each_phrase_with_tag('NP') do |phrase|
199
- puts phrase.to_s
200
- end
201
-
202
- =end
203
- =begin
204
- ### URL documents, XML serialization.
205
-
206
- urls = ['http://www.cbc.ca/news/world/story/2012/11/25/snc-lavalin-ben-aissa-charges.html',
207
- 'http://www.cbc.ca/news/world/story/2012/11/25/egypt.html', 'http://www.cbc.ca/news/canada/prince-edward-island/story/2012/11/25/pei-murder-arrest-stlucia.html', 'http://www.cbc.ca/news/world/story/2012/11/25/bangladesh-garment-factory-fire.html']
208
-
209
- c = collection
210
- urls.each { |url| c << document(url) }
211
-
212
- # View the collection.
213
- c.print_tree
214
-
215
- c.apply :chunk, :segment, :tokenize
216
- c.serialize :xml, :file => 'test.xml'
217
-
218
- # Reopen the collection.
219
- c = collection 'test.xml'
220
-
221
- # View it again.
222
- c.print_tree
223
- =end
224
- =begin
225
- include Treat::Core::DSL
226
-
227
- # Show progress bars for download.
228
- Treat.core.verbosity.silence = false
229
- # Explain what Treat is doing.
230
- Treat.core.verbosity.debug = true
231
-
232
- # Define the question "is it junk?" on sentences.
233
- qn = question(:is_junk, :sentence)
234
-
235
- # Frame the problem as depending on punctuation
236
- # count and word count for each sentence.
237
- pb = problem(qn,
238
- feature(:punctuation_count),
239
- feature(:word_count) )
240
-
241
- # Get some web documents to work on.
242
- url1 = 'http://en.wikipedia.org/wiki/NOD_mouse'
243
- url2 = 'http://en.wikipedia.org/wiki/Academic_studies_about_Wikipedia'
244
- d1, d2 = document(url1), document(url2)
245
-
246
- # Process both of our documents.
247
- [d1,d2].apply(:chunk, :segment, :tokenize)
248
-
249
- # Answer our problem to create a training set.
250
- d1.sentences[0..17].each { |s| s.set :is_junk, 0 }
251
- d1.sentences[17..-1].each { |s| s.set :is_junk, 1 }
252
- d_set = d1.export(pb)
253
-
254
- # Define our gold standard results for evaluation.
255
- d2.sentences[0..81].each { |s| s.set :is_true_junk, 0 }
256
- d2.sentences[81..-1].each { |s| s.set :is_true_junk, 1 }
257
-
258
- tp, fp, tn, fn = 0.0, 0.0, 0.0, 0.0
259
-
260
- d2.sentences.map do |s|
261
- pred = s.classify(:id3, training: d_set)
262
- if pred == 1
263
- tp += 1 if s.is_true_junk == 1
264
- fp += 1 if s.is_true_junk == 0
265
- else
266
- tn += 1 if s.is_true_junk == 0
267
- fn += 1 if s.is_true_junk == 1
268
- end
269
- end
270
-
271
- puts "Precision: #{tp/(tp + fp)}"
272
- puts "Recall: #{tp/(tp + fn)}"
273
- =end
274
- =begin
275
- d = document 'http://louismullie.com/susan-text-scan1.jpg'
276
- d.apply :chunk, :segment, :tokenize
277
- d.print_tree
278
- =end
279
- =begin
280
- # Syntax example
281
- phra = phrase 'Obama', 'Sarkozy', 'Meeting'
282
-
283
- para = paragraph 'Obama and Sarkozy met on January 1st to'
284
- 'investigate the possibility of a new rescue plan. Nicolas ' +
285
- 'Sarkozy is to meet Merkel next Tuesday in Berlin.'
286
-
287
- sect = section title(phra), para
288
- =end
289
- =begin
290
- puts "beer".plural.inspect
291
- =end
292
- # Treat.core.language.detect = true
293
- # s = sentence "Du hast deiner Frau einen roten Ring gekauft."
294
- #s.apply(:parse,:category).print_tree