treat 2.0.3 → 2.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. data/lib/treat/config/data/languages/agnostic.rb +6 -3
  2. data/lib/treat/config/data/languages/english.rb +1 -1
  3. data/lib/treat/config/data/workers/extractors.rb +8 -0
  4. data/lib/treat/loaders/stanford.rb +2 -0
  5. data/lib/treat/version.rb +1 -1
  6. data/lib/treat/workers/extractors/distance/levenshtein.rb +35 -0
  7. data/lib/treat/workers/extractors/name_tag/stanford.rb +4 -1
  8. data/lib/treat/workers/extractors/similarity/jaro_winkler.rb +38 -0
  9. data/lib/treat/workers/extractors/similarity/tf_idf.rb +19 -3
  10. data/lib/treat/workers/extractors/time/chronic.rb +6 -41
  11. data/lib/treat/workers/extractors/time/kronic.rb +20 -0
  12. data/lib/treat/workers/extractors/time/nickel.rb +0 -15
  13. data/lib/treat/workers/extractors/time/ruby.rb +2 -33
  14. data/lib/treat/workers/lexicalizers/taggers/stanford.rb +11 -10
  15. data/lib/treat/workers/processors/parsers/stanford.rb +60 -112
  16. data/spec/entities/collection.rb +29 -25
  17. data/spec/entities/document.rb +45 -44
  18. data/spec/entities/entity.rb +295 -294
  19. data/spec/entities/phrase.rb +21 -17
  20. data/spec/entities/token.rb +43 -40
  21. data/spec/entities/word.rb +5 -1
  22. data/spec/entities/zone.rb +26 -22
  23. data/spec/helper.rb +7 -2
  24. data/spec/learning/data_set.rb +145 -141
  25. data/spec/learning/export.rb +46 -42
  26. data/spec/learning/problem.rb +114 -110
  27. data/spec/learning/question.rb +46 -42
  28. data/spec/treat.rb +41 -37
  29. data/spec/workers/agnostic.rb +2 -2
  30. data/spec/workers/english.rb +12 -12
  31. metadata +7 -8
  32. data/files/21552208.html +0 -786
  33. data/files/nethttp-cheat-sheet-2940.html +0 -393
  34. data/lib/treat/workers/extractors/similarity/levenshtein.rb +0 -36
  35. data/spec/sandbox.rb +0 -294
  36. data/spec/workers/examples/english/mathematicians/euler.html +0 -21
@@ -1,393 +0,0 @@
1
- <!doctype html>
2
-
3
- <!--[if lt IE 7 ]> <html lang="en" class="ie6"> <![endif]-->
4
- <!--[if IE 7 ]> <html lang="en" class="ie7"> <![endif]-->
5
- <!--[if IE 8 ]> <html lang="en" class="ie8"> <![endif]-->
6
- <!--[if IE 9 ]> <html lang="en" class="ie9"> <![endif]-->
7
- <!--[if (gt IE 9)|!(IE)]><!--> <html lang="en" class="no-js"> <!--<![endif]-->
8
-
9
- <head>
10
- <meta charset="utf-8">
11
- <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
12
- <title>Net::HTTP Cheat Sheet</title>
13
- <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
14
- <link rel="icon" href="/favicon.png" type="image/png">
15
- <link rel="shortcut icon" href="/favicon.png" type="image/png">
16
- <link rel="alternate" type="application/rss+xml" title="Ruby Inside" href="http://www.rubyinside.com/feed/" />
17
- <link rel="stylesheet" href="http://www.rubyinside.com/wp-content/themes/ri2011/css/ri.css">
18
- <!-- script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.4.1/jquery.min.js"></script -->
19
- <script type="text/javascript" src="http://use.typekit.com/dmj7czx.js"></script>
20
- <script type="text/javascript">try{Typekit.load();}catch(e){}</script>
21
- <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.7.0/jquery.min.js"></script>
22
-
23
- <!--[if lt IE 9]>
24
- <script src="http://html5shiv.googlecode.com/svn/trunk/html5.js"></script>
25
- <![endif]-->
26
- </head>
27
-
28
- <body class="single single-post postid-2940 single-format-standard">
29
- <div id="superheader">Want to stay on top? <a href="http://rubyweekly.com/?m">Ruby Weekly</a> is a once-weekly e-mail newsletter covering the latest Ruby and Rails news.</div>
30
- <div id="container">
31
- <div class="outerheader top">
32
- <div class="right"><form method="get" id="searchform" action="/"><input type="text" value="" name="s" id="s" size="24" /><input type="submit" id="searchsubmit" value="Search" /></form>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href="http://www.rubyinside.com/feed/"><img src="http://feeds2.feedburner.com/~fc/RubyInside?bg=99CCFF&fg=333333&anim=0" align="top" alt="Feed Icon" /></a></div>
33
- <a href="/"><img src="http://www.rubyinside.com/wp-content/themes/ri2011/css/logo.png" id="logo" alt="Ruby Inside - A Ruby Blog" /></a>
34
- </div>
35
-
36
- <div class="outerheader masthead">
37
- <div class="menu">
38
- <ul>
39
- <li><a href="/" class="home">Home</a></li> <li><a href="/about/">About</a></li>
40
- <li><a href="/archives/">Archives</a></li>
41
- <!-- li><a href="/advertise/">Advertising</a></li -->
42
- <li><a href="http://jobs.rubyinside.com/">Ruby and Rails Jobs</a></li>
43
- <li class="right"><a href="/feed/">RSS</a> <!-- span class="count">(24146)</span --></li>
44
- <li class="right">
45
- </li>
46
- <li class="right"><a href="http://twitter.com/rubyinside">Follow us on Twitter</a> <!-- span class="count">(5548)</span --></li>
47
- </ul>
48
- </div>
49
-
50
-
51
- </div>
52
-
53
- <div id="innercontainer"><div id="page">
54
-
55
-
56
- <div id="sidebar">
57
- <div style="margin-top: 12px"><a href="http://twitter.com/RubyInside" class="twitter-follow-button">Follow @RubyInside</a>
58
- <script src="http://platform.twitter.com/widgets.js" type="text/javascript"></script></div>
59
- <div id="execphp-3" class="widget-container section widget_execphp">
60
- <div class="execphpwidget"> <h3><a href="http://jobs.rubyinside.com/">Ruby and Rails Jobs</a></h3>
61
- <div class="inner">
62
- <ul>
63
- <!-- ? readfile('http://www.rubyinside.com/jobs.html'); ? -->
64
- <li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/778380">Front-End Engineer</a></div><div class='company'>New Relic</div><div class="location">San Francisco, California</div></li>
65
- <li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/780028">Ruby on Rails Developer @WeedMaps</a></div><div class='company'>WeedMaps</div><div class="location">Denver, Colorado</div></li>
66
- <li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/766252">Ruby on Rails Developer</a></div><div class='company'>CSD</div><div class="location">Austin, Texas</div></li>
67
- <li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/776046">Front-End Developer</a></div><div class='company'>Bedrocket Media Ventures</div><div class="location">New York, New York</div></li>
68
- <li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/769030">Telecommute-Principal Ruby Consultant</a></div><div class='company'>IMPRTL Inc</div><div class="location">Chicago, Illinois</div></li>
69
- <li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/772705">Senior Rails Developer</a></div><div class='company'>Lightspeed Systems</div><div class="location">Austin, Texas</div></li>
70
- <li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/776045">Lead Software Developer</a></div><div class='company'>Bedrocket Media Ventures</div><div class="location">New York, New York</div></li>
71
- <li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/771110">Gifted RoR Software Engineer Technologist</a></div><div class='company'>Chelsmore Apartments</div><div class="location">New York, New York</div></li>
72
- <div class="minor"><a href="http://ruby.jobamatic.com/">More jobs &raquo;</a> : <a href="/post-a-job">Post a Job</a></div>
73
- </div>
74
- </div>
75
- </div><div id="text-3" class="widget-container section widget_text"> <div class="textwidget"><!-- a href="http://owningrails.com/?ref=5905208113"><img src="http://owningrails.com/images/ad-with.png" /></a --><!-- a href="http://rubyweekly.com/"><img src="http://rubyinside.com/wp-content/themes/ri20102/images/rw-ri-box.gif" /></a --><!-- a href="https://cooperpress.com/19walkthrough"><img src="/images/ruby19wt2.gif" /></a --><!-- a href="https://cooperpress.com/rubyreloaded"><img src="http://rubyinside.com/images/reloaded-riad.png" /></a --></div>
76
- </div>
77
-
78
- <!-- if (is_front_page() && !is_paged()) { -->
79
- </div>
80
-
81
- <div id="main">
82
-
83
-
84
-
85
- <div class="post-2940 post type-post status-publish format-standard hentry category-reference" id="post-2940">
86
-
87
- <div class="content">
88
-
89
- <div class="title"><h2>Net::HTTP Cheat Sheet</h2></div>
90
-
91
- <p class="author">By <a href="http://www.rubyinside.com/author/admin" title="View all posts by Peter Cooper">Peter Cooper</a> <span class="date">/ January 16, 2010</span></p>
92
- <div id="thecontent"><p><img src="http://www.rubyinside.com/wp-content/uploads/2010/01/http-is-a-hamster-on-rollerblades-says-marc-andre-cournoyer.gif" width="120" height="120" alt="http-is-a-hamster-on-rollerblades-says-marc-andre-cournoyer.gif" style="float:left; margin-right:12px; margin-bottom:12px; border:1px #000000 solid;" />Norwegian Rubyist <a href="http://august.lilleaas.net/">August Lilleaas</a> has been busy putting together <a href="http://github.com/augustl/net-http-cheat-sheet">a ton of examples</a> of using the <a href="http://ruby-doc.org/stdlib/libdoc/net/http/rdoc/index.html">Net::HTTP</a> Ruby library that comes with most Ruby distributions. I asked him if it'd be okay to put some of them directly on Ruby Inside for reference purposes and he said "No problem!"</p>
93
- <p>It's worth noting that Net::HTTP has been superseded in many areas by libraries like John Nunemaker's <a href="http://github.com/jnunemaker/httparty">HTTParty</a> and Paul DIx's high performance <a href="http://github.com/pauldix/typhoeus">Typhoeus</a>, but as part of the standard library, Net::HTTP is still a popular option though it doesn't have the easiest API to remember.</p>
94
- <p>Here's a selection of August's examples for some of the most common operations. Want to see <i>all</i> of the examples and follow any updates made to them? Check out August's <a href="http://github.com/augustl/net-http-cheat-sheet">net-http-cheat-sheet GitHub repo</a>.</p>
95
- <h3>Standard HTTP Request</h3>
96
- <pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
97
- <span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
98
-
99
- <span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://google.com/</span><span class="punct">")</span>
100
-
101
- <span class="comment"># Shortcut</span>
102
- <span class="ident">response</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">get_response</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">)</span>
103
-
104
- <span class="comment"># Will print response.body</span>
105
- <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">get_print</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">)</span>
106
-
107
- <span class="comment"># Full</span>
108
- <span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
109
- <span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">))</span></pre>
110
- <h3>Basic Auth</h3>
111
- <pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
112
- <span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
113
-
114
- <span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://google.com/</span><span class="punct">")</span>
115
-
116
- <span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
117
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
118
- <span class="ident">request</span><span class="punct">.</span><span class="ident">basic_auth</span><span class="punct">("</span><span class="string">username</span><span class="punct">",</span> <span class="punct">"</span><span class="string">password</span><span class="punct">")</span>
119
- <span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span></pre>
120
- <h3>Dealing with response objects</h3>
121
- <pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
122
- <span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
123
-
124
- <span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://google.com/</span><span class="punct">")</span>
125
-
126
- <span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
127
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
128
-
129
- <span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
130
-
131
- <span class="ident">response</span><span class="punct">.</span><span class="ident">code</span> <span class="comment"># => 301</span>
132
- <span class="ident">response</span><span class="punct">.</span><span class="ident">body</span> <span class="comment"># => The body (HTML, XML, blob, whatever)</span>
133
- <span class="comment"># Headers are lowercased</span>
134
- <span class="ident">response</span><span class="punct">["</span><span class="string">cache-control</span><span class="punct">"]</span> <span class="comment"># => public, max-age=2592000</span></pre>
135
- <h3>POST form request</h3>
136
- <pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
137
- <span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
138
-
139
- <span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://example.com/search</span><span class="punct">")</span>
140
-
141
- <span class="comment"># Shortcut</span>
142
- <span class="ident">response</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">post_form</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">,</span> <span class="punct">{"</span><span class="string">q</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">My query</span><span class="punct">",</span> <span class="punct">"</span><span class="string">per_page</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">50</span><span class="punct">"})</span>
143
-
144
- <span class="comment"># Full control</span>
145
- <span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
146
-
147
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Post</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
148
- <span class="ident">request</span><span class="punct">.</span><span class="ident">set_form_data</span><span class="punct">({"</span><span class="string">q</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">My query</span><span class="punct">",</span> <span class="punct">"</span><span class="string">per_page</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">50</span><span class="punct">"})</span>
149
-
150
- <span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span></pre>
151
- <h3>File upload - input type="file" style</h3>
152
- <pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
153
- <span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
154
-
155
- <span class="comment"># Token used to terminate the file in the post body. Make sure it is not</span>
156
- <span class="comment"># present in the file you're uploading.</span>
157
- <span class="constant">BOUNDARY</span> <span class="punct">=</span> <span class="punct">"</span><span class="string">AaB03x</span><span class="punct">"</span>
158
-
159
- <span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://something.com/uploads</span><span class="punct">")</span>
160
- <span class="ident">file</span> <span class="punct">=</span> <span class="punct">"</span><span class="string">/path/to/your/testfile.txt</span><span class="punct">"</span>
161
-
162
- <span class="ident">post_body</span> <span class="punct">=</span> <span class="punct">[]</span>
163
- <span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string">--<span class="expr">#{BOUNDARY}</span><span class="escape">rn</span></span><span class="punct">"</span>
164
- <span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string">Content-Disposition: form-data; name=<span class="escape">"</span>datafile<span class="escape">"</span>; filename=<span class="escape">"</span><span class="expr">#{File.basename(file)}</span><span class="escape">"rn</span></span><span class="punct">"</span>
165
- <span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string">Content-Type: text/plain<span class="escape">rn</span></span><span class="punct">"</span>
166
- <span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string"><span class="escape">rn</span></span><span class="punct">"</span>
167
- <span class="ident">post_body</span> <span class="punct">< <</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">(</span><span class="ident">file</span><span class="punct">)</span>
168
- <span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string"><span class="escape">rn</span>--<span class="expr">#{BOUNDARY}</span>--<span class="escape">rn</span></span><span class="punct">"</span>
169
-
170
- <span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
171
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Post</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
172
- <span class="ident">request</span><span class="punct">.</span><span class="ident">body</span> <span class="punct">=</span> <span class="ident">post_body</span><span class="punct">.</span><span class="ident">join</span>
173
- <span class="ident">request</span><span class="punct">["</span><span class="string">Content-Type</span><span class="punct">"]</span> <span class="punct">=</span> <span class="punct">"</span><span class="string">multipart/form-data, boundary=<span class="expr">#{BOUNDARY}</span></span><span class="punct">"</span>
174
-
175
- <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span></span></span></span></span></span></span></pre>
176
- <h3>SSL/HTTPS request</h3>
177
- <p><strong>Update: There are some good reasons why this code example is bad. It introduces a potential security vulnerability if it's essential you use the server certificate to verify the identity of the server you're connecting to. There's <a href="http://www.rubyinside.com/how-to-cure-nethttps-risky-default-https-behavior-4010.html">a fix for the issue though!</a></strong></p>
178
- <pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/https</span><span class="punct">"</span>
179
- <span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
180
-
181
- <span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">https://secure.com/</span><span class="punct">")</span>
182
- <span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
183
- <span class="ident">http</span><span class="punct">.</span><span class="ident">use_ssl</span> <span class="punct">=</span> <span class="constant">true</span>
184
- <span class="ident">http</span><span class="punct">.</span><span class="ident">verify_mode</span> <span class="punct">=</span> <span class="constant">OpenSSL</span><span class="punct">::</span><span class="constant">SSL</span><span class="punct">::</span><span class="constant">VERIFY_NONE</span>
185
-
186
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
187
-
188
- <span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
189
- <span class="ident">response</span><span class="punct">.</span><span class="ident">body</span>
190
- <span class="ident">response</span><span class="punct">.</span><span class="ident">status</span>
191
- <span class="ident">response</span><span class="punct">["</span><span class="string">header-here</span><span class="punct">"]</span> <span class="comment"># All headers are lowercase</span></pre>
192
- <h3>SSL/HTTPS request with PEM certificate</h3>
193
- <pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/https</span><span class="punct">"</span>
194
- <span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
195
-
196
- <span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">https://secure.com/</span><span class="punct">")</span>
197
- <span class="ident">pem</span> <span class="punct">=</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">("</span><span class="string">/path/to/my.pem</span><span class="punct">")</span>
198
- <span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
199
- <span class="ident">http</span><span class="punct">.</span><span class="ident">use_ssl</span> <span class="punct">=</span> <span class="constant">true</span>
200
- <span class="ident">http</span><span class="punct">.</span><span class="ident">cert</span> <span class="punct">=</span> <span class="constant">OpenSSL</span><span class="punct">::</span><span class="constant">X509</span><span class="punct">::</span><span class="constant">Certificate</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">pem</span><span class="punct">)</span>
201
- <span class="ident">http</span><span class="punct">.</span><span class="ident">key</span> <span class="punct">=</span> <span class="constant">OpenSSL</span><span class="punct">::</span><span class="constant">PKey</span><span class="punct">::</span><span class="constant">RSA</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">pem</span><span class="punct">)</span>
202
- <span class="ident">http</span><span class="punct">.</span><span class="ident">verify_mode</span> <span class="punct">=</span> <span class="constant">OpenSSL</span><span class="punct">::</span><span class="constant">SSL</span><span class="punct">::</span><span class="constant">VERIFY_PEER</span>
203
-
204
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span></pre>
205
- <h3>REST methods</h3>
206
- <pre><span class="comment"># Basic REST.</span>
207
- <span class="comment"># Most REST APIs will set semantic values in response.body and response.code.</span>
208
- <span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
209
-
210
- <span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">api.restsite.com</span><span class="punct">")</span>
211
-
212
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Post</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">/users</span><span class="punct">")</span>
213
- <span class="ident">request</span><span class="punct">.</span><span class="ident">set_form_data</span><span class="punct">({"</span><span class="string">users[login]</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">quentin</span><span class="punct">"})</span>
214
- <span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
215
- <span class="comment"># Use nokogiri, hpricot, etc to parse response.body.</span>
216
-
217
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">/users/1</span><span class="punct">")</span>
218
- <span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
219
- <span class="comment"># As with POST, the data is in response.body.</span>
220
-
221
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Put</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">/users/1</span><span class="punct">")</span>
222
- <span class="ident">request</span><span class="punct">.</span><span class="ident">set_form_data</span><span class="punct">({"</span><span class="string">users[login]</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">changed</span><span class="punct">"})</span>
223
- <span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
224
-
225
- <span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Delete</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">/users/1</span><span class="punct">")</span>
226
- <span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span></pre>
227
- <p>There are more in August's repo if you want to keep browsing..</p>
228
- <p style="background-color: #ffc; font-weight: bold; font-size: 13px; color: #000;">Job: New Relic is <a href="http://ruby.jobamatic.com/a/jbb/job-details/165476">looking for a Ruby on Rails developer in Portland, Oregon.</a></p>
229
- </div>
230
-
231
-
232
-
233
-
234
- <!-- div style="margin-bottom: 8px; background-color: #ffc; text-align: center; padding: 6px"><a href="http://zfer.us/EKm97" style="text-decoration: none; margin: 0; padding: 0" rel="nofollow"><img src="http://www.rubyinside.com/images/railstutorial-box.gif" /></a></div -->
235
- <!-- <a href="http://www.rubyinside.com/19walkthrough/"><img src="http://www.rubyinside.com/images/19pro.gif" /></a> -->
236
-
237
-
238
-
239
-
240
-
241
- </div>
242
- </div>
243
-
244
- <div id="related"><h3>Related Posts</h3><ul><li><a href='http://www.rubyinside.com/cheat-sheet-for-rest-on-rails-261.html' rel='bookmark' title='Cheat Sheet for REST on Rails'>Cheat Sheet for REST on Rails</a></li>
245
- <li><a href='http://www.rubyinside.com/quick-ruby-reference-cheat-sheet-47.html' rel='bookmark' title='Quick Ruby Reference / Cheat Sheet'>Quick Ruby Reference / Cheat Sheet</a></li>
246
- <li><a href='http://www.rubyinside.com/ruby-on-rails-testing-cheat-sheet-206.html' rel='bookmark' title='Ruby on Rails Testing Cheat Sheet'>Ruby on Rails Testing Cheat Sheet</a></li>
247
- </ul></div>
248
- <div id="commentzone">
249
-
250
-
251
-
252
- <h3 id="comments-title">Comments</h3>
253
-
254
- <ol class="commentlist">
255
- <li class="comment even thread-even depth-1" id="comment-40542">
256
- <img alt='' src='http://0.gravatar.com/avatar/6268c7528d855f1cef5696a00d159909?s=64&amp;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D64&amp;r=G' class='avatar avatar-64 photo' height='64' width='64' /> <cite><a href='http://twitter.com/peterc' rel='external nofollow' class='url'>Peter Cooper</a> says:</cite><br />
257
-
258
- <small class="commentmetadata">January 16, 2010 at 1:02 am</small>
259
-
260
-
261
- <p>The idea for the hamster on rollerskates issue can be discovered by checking out its filename.. :-)</p>
262
-
263
-
264
- </li> <li class="comment odd alt thread-odd thread-alt depth-1" id="comment-40546">
265
- <img alt='' src='http://0.gravatar.com/avatar/aa31b79adedc3f60547769f1a8971ba6?s=64&amp;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D64&amp;r=G' class='avatar avatar-64 photo' height='64' width='64' /> <cite><a href='http://emmanueloga@gmail.com' rel='external nofollow' class='url'>Emmanuel</a> says:</cite><br />
266
-
267
- <small class="commentmetadata">January 16, 2010 at 3:50 pm</small>
268
-
269
-
270
- <p>Anybody knows if the HTTP::Net issues related with Timeout is still present in all (or any) ruby versions?</p>
271
- <p><a href="http://blog.headius.com/2008/02/rubys-threadraise-threadkill-timeoutrb.html" rel="nofollow">http://blog.headius.com/2008/02/rubys-threadraise-threadkill-timeoutrb.html</a></p>
272
-
273
-
274
- </li> <li class="comment even thread-even depth-1" id="comment-40550">
275
- <img alt='' src='http://0.gravatar.com/avatar/2bfc6436d28fc4a224e3ff1702a046d0?s=64&amp;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D64&amp;r=G' class='avatar avatar-64 photo' height='64' width='64' /> <cite>ratbeard says:</cite><br />
276
-
277
- <small class="commentmetadata">January 17, 2010 at 4:23 pm</small>
278
-
279
-
280
- <p>Another library that abstracts over Net::HTTP is Adam Wiggin's 'rest-client' library. It seems more targeted toward single requests (i.e. Restclient.get 'google.com') than mixing in and building a request class like HTTParty, though both libraries do variants of each style. It has a great interactive shell and request logging.</p>
281
- <p><a href="http://github.com/archiloque/rest-client/" rel="nofollow">http://github.com/archiloque/rest-client/</a></p>
282
- <p>I **highly** recommend another library authored by Mr. Wiggins that abstracts over the messy file system api, 'rush'. I'm really surprised that more libraries and apps that have a non-trivial amount of file system code don't use it.</p>
283
- <p><a href="http://rush.heroku.com/" rel="nofollow">http://rush.heroku.com/</a></p>
284
- <p>The code is very clean in both, I tip my hat to you Mr. Wiggins.</p>
285
-
286
-
287
- </li> <li class="comment odd alt thread-odd thread-alt depth-1" id="comment-40556">
288
- <img alt='' src='http://0.gravatar.com/avatar/e61f142f400df8299d37c2bce09e3478?s=64&amp;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D64&amp;r=G' class='avatar avatar-64 photo' height='64' width='64' /> <cite><a href='http://august.lilleaas.net/' rel='external nofollow' class='url'>August Lilleaas</a> says:</cite><br />
289
-
290
- <small class="commentmetadata">January 19, 2010 at 10:19 am</small>
291
-
292
-
293
- <p>I always use one of these libraries when I can (rest-client, httparty, ...). In some cases you'd be better off without dependencies, though, such as in small shell scripts, etc.</p>
294
-
295
-
296
- </li> </ol>
297
-
298
-
299
-
300
- </div>
301
-
302
- <h3>Other Posts to Enjoy</h3>
303
- <div class="widget_featured-posts noborder four">
304
- <ul class="clearfix"><li>
305
-
306
- <a href="http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html"><img src="http://www.rubyinside.com/wp-content/plugins/wp-featured-post-with-thumbnail/scripts/timthumb.php?src=http://www.rubyinside.com/wp-content/uploads/2012/11/spaced.png&amp;h=73&amp;w=73&amp;zc=1" class="alignleft" alt="The Split is Not Enough: Unicode Whitespace Shenigans for Rubyists" /></a>
307
- <h4 class="featured-title"><a href="http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html">The Split is Not Enough: Unicode Whitespace Shenigans for Rubyists</a></h4>
308
- </li>
309
- <li>
310
-
311
- <a href="http://www.rubyinside.com/mega-february-2012-ruby-news-5815.html"><img src="http://www.rubyinside.com/wp-content/plugins/wp-featured-post-with-thumbnail/scripts/timthumb.php?src=http://www.rubyinside.com/wp-content/uploads/2012/03/mega.png&amp;h=73&amp;w=73&amp;zc=1" class="alignleft" alt="The Mega Ruby News and Release Roundup for February 2012" /></a>
312
- <h4 class="featured-title"><a href="http://www.rubyinside.com/mega-february-2012-ruby-news-5815.html">The Mega Ruby News and Release Roundup for February 2012</a></h4>
313
- </li>
314
- <li>
315
-
316
- <a href="http://www.rubyinside.com/sinatra-book-review-5704.html"><img src="http://www.rubyinside.com/wp-content/plugins/wp-featured-post-with-thumbnail/scripts/timthumb.php?src=http://www.rubyinside.com/wp-content/uploads/2011/12/sinatra-up.jpeg&amp;h=73&amp;w=73&amp;zc=1" class="alignleft" alt="A Lagom Review of O&#8217;Reilly&#8217;s &#8216;Sinatra Up and Running&#8217;" /></a>
317
- <h4 class="featured-title"><a href="http://www.rubyinside.com/sinatra-book-review-5704.html">A Lagom Review of O&#8217;Reilly&#8217;s &#8216;Sinatra Up and Running&#8217;</a></h4>
318
- </li>
319
- <li>
320
-
321
- <a href="http://www.rubyinside.com/rubys-unary-operators-and-how-to-redefine-their-functionality-5610.html"><img src="http://www.rubyinside.com/wp-content/plugins/wp-featured-post-with-thumbnail/scripts/timthumb.php?src=http://www.rubyinside.com/wp-content/uploads/2011/11/unary.gif&amp;h=73&amp;w=73&amp;zc=1" class="alignleft" alt="Ruby&#8217;s Unary Operators and How to Define Their Functionality" /></a>
322
- <h4 class="featured-title"><a href="http://www.rubyinside.com/rubys-unary-operators-and-how-to-redefine-their-functionality-5610.html">Ruby&#8217;s Unary Operators and How to Define Their Functionality</a></h4>
323
- </li>
324
- </ul> </div>
325
-
326
-
327
-
328
-
329
-
330
- <h3>Twitter Mentions</h3>
331
- <div id="boastful"></div>
332
-
333
- <div class="previousnext">
334
- <div class="next"><a href="http://www.rubyinside.com/this-weeks-ruby-news-rspec-2-8-0-rc1-minitest-2-8-0-and-whats-new-in-bundler-1-1-5637.html" rel="next">Next Post &raquo;</a></div>
335
- <div class="previous"><a href="http://www.rubyinside.com/the-ruby-standard-library-to-be-converted-to-gems-for-ruby-2-0-5586.html" rel="prev">&laquo; Previous Post</a></div>
336
- </div>
337
-
338
- <!-- <h3>Want to get up to speed with Ruby 1.9?</h3>
339
-
340
- <p><a href="http://www.rubyinside.com/19walkthrough/"><img src="http://www.rubyinside.com/images/19pro.gif" /></a> </p>
341
- -->
342
-
343
-
344
- </div>
345
-
346
- </div>
347
-
348
- </div> <!-- inner -->
349
-
350
-
351
-
352
-
353
- </div> <!-- container -->
354
-
355
-
356
- <div id="footer">
357
- <div class="inner">
358
- <p>Copyright &copy; 2006&ndash;2012 <a href="http://twitter.com/peterc">Peter Cooper</a></p>
359
- </div>
360
- </div>
361
-
362
- <script type="text/javascript" src="http://engine.rubyrow.net/z/1313/adzerk1_4_16_19,adzerk2_4_16_19,adzerk3_4_16_19,adzerk4_4_16_19,adzerk5_4_16_19,adzerk6_4_16_19,adzerk7_4_16_19,adzerk8_4_16_19"></script>
363
- <script type="text/javascript" src="http://www.rubyinside.com/wp-content/themes/ri2011/jquery.boastful.js"></script>
364
-
365
- <script type="text/javascript">
366
- $(document).ready(function() {
367
- $('#boastful').boastful();
368
- });
369
- </script>
370
-
371
- <script type="text/javascript">
372
- var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
373
- document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
374
- </script>
375
- <script type="text/javascript">
376
- var pageTracker = _gat._getTracker("UA-2237791-3");
377
- pageTracker._initData();
378
- pageTracker._trackPageview();
379
- </script>
380
-
381
-
382
- </body>
383
- </html>
384
-
385
- <!-- div style="float:right"><a href="http://twitter.com/share" class="twitter-share-button" data-url="http://www.rubyinside.com/rubys-unary-operators-and-how-to-redefine-their-functionality-5610.html" data-counturl="http://www.rubyinside.com/rubys-unary-operators-and-how-to-redefine-their-functionality-5610.html" data-text="Ruby&#8217;s Unary Operators and How to Define Their Functionality" data-count="horizontal">Tweet</a></div -->
386
- <!-- div style="margin-bottom: -12px; margin-top: -10px"><iframe src="http://www.facebook.com/plugins/like.php?href=http%3A%2F%2Fwww.rubyinside.com%2Frubys-unary-operators-and-how-to-redefine-their-functionality-5610.html&amp;layout=standard&amp;show_faces=false&amp;width=420&amp;action=like&amp;colorscheme=light&amp;height=26" scrolling="no" frameborder="0" style="border:none; overflow:hidden; width:420px; height:26px;" allowTransparency="true"></iframe></div -->
387
- <!-- div><script src="http://connect.facebook.net/en_US/all.js#xfbml=1"></script><fb:like href="http://x.com/" show_faces="false" width="450"></fb:like></div -->
388
-
389
-
390
-
391
- <!-- Dynamic page generated in 0.233 seconds. -->
392
- <!-- Cached page generated by WP-Super-Cache on 2013-01-04 01:31:41 -->
393
- <!-- super cache -->
@@ -1,36 +0,0 @@
1
- class Treat::Workers::Extractors::Similarity
2
- # Default options.
3
- DefaultOptions = {
4
- with: '',
5
- ins_cost: 1,
6
- del_cost: 1,
7
- sub_cost: 1
8
- }
9
- # Return the levensthein distance between
10
- # two strings taking into account the costs
11
- # of insertion, deletion, and substitution.
12
- # Used by did_you_mean? to detect typos.
13
- def self.similarity(entity, options)
14
- first, other = entity.to_s, options[:with].to_s
15
- options = DefaultOptions.merge(options)
16
- other, ins, del, sub, = options[:with],
17
- options[:inst_cost], options[:del_cost],
18
- options[:sub_cost]
19
- fill, dm = [0] * (first.length - 1).abs,
20
- [(0..first.length).collect { |i| i * ins}]
21
- for i in 1..other.length
22
- dm[i] = [i * del, fill.flatten]
23
- end
24
- for i in 1..other.length
25
- for j in 1..first.length
26
- dm[i][j] = [
27
- dm[i-1][j-1] + (first[i-1] ==
28
- other[i-1] ? 0 : sub), dm[i][j-1] +
29
- ins, dm[i-1][j] + del
30
- ].min
31
- end
32
- end
33
- dm[other.length][first.length]
34
- end
35
-
36
- end
data/spec/sandbox.rb DELETED
@@ -1,294 +0,0 @@
1
- # encoding: utf-8
2
- require_relative '../lib/treat'
3
-
4
- Treat.databases.mongo.db = 'treat_test'
5
- Treat.libraries.stanford.model_path =
6
- '/ruby/stanford-core-nlp-minimal/models/'
7
- Treat.libraries.stanford.jar_path =
8
- '/ruby/stanford-core-nlp-minimal/bin/'
9
- Treat.libraries.punkt.model_path =
10
- '/ruby/punkt/models/'
11
- Treat.libraries.reuters.model_path =
12
- '/ruby/reuters/models/'
13
-
14
- # include Treat::Core::DSL
15
-
16
- Treat::Builder.new do
17
- s = sentence "Hello, world!"
18
- s.print_tree
19
- end
20
-
21
- p = paragraph('A walk in the park. A trip on a boat.').segment
22
- p.visualize :dot, file: 'test.dot'
23
- =begin
24
-
25
- g = group("I was running")
26
- puts g.tag.inspect
27
-
28
- Treat.libraries.stanford.jar_path = '/ruby/treat/bin/'
29
- Treat.libraries.stanford.model_path = '/ruby/treat/models/'
30
-
31
- p = paragraph
32
- s = sentence
33
- w = word
34
-
35
- p = phrase 'hello world'
36
- e = email 'louis@gmail.com'
37
-
38
- d = question(:is_feature, :word)
39
- =end
40
- #d = document Treat.paths.spec + 'workers/examples/english/economist/hungarys_troubles.txt'
41
- #d.apply :chunk, :segment, :tokenize, :tag, :category, :name_tag
42
- #d.print_tree
43
- #d = document Treat.paths.spec + 'workers/examples/english/economist/saving_the_euro.odt'
44
- #d.print_tree
45
- =begin
46
- d = document 'test.htm'
47
- d.apply :chunk
48
- #d.serialize :yaml, file: 'test444.yaml'
49
- d.set :test, 2
50
- d.serialize :mongo, db: 'test'
51
- d.set :test, 3
52
- d.serialize :mongo, db: 'test'
53
- d.apply :segment, :tokenize, :tag, :category
54
- puts d.verb_count
55
- #d2 = document id: d.id, db: 'test'
56
- d2 = document 'features.test' => 3, db: 'test'
57
- d2.apply :segment, :tokenize, :tag, :category
58
- puts d2.verb_count
59
- #d.print_tree
60
- #s = document 'http://www.economist.com'
61
-
62
- p = phrase 'hello', 'world', '!'
63
- puts p.to_s
64
- puts p.to_str
65
- =end
66
-
67
- =begin
68
- ### Super basics.
69
- puts p.value
70
-
71
- p << 'bitch'
72
- p << word('hello')
73
- puts p.to_s
74
- puts p.to_str
75
- puts p.value
76
- puts p.to_ary.inspect
77
- =end
78
-
79
- =begin
80
-
81
- ### Configuration
82
-
83
- # A boolean value indicating whether to silence the output of external libraries (e.g. Stanford tools, Enju, LDA, Ruby-FANN) when they are used.
84
- puts Treat.core.verbosity.silence
85
- # A boolean value indicating whether to explain the steps that Treat is performing.
86
- puts Treat.core.verbosity.debug
87
- # A boolean value indicating whether Treat should try to detect the language of newly input text.
88
- puts Treat.core.language.detect
89
- # The language to default to when detection is off.
90
- puts Treat.core.language.default
91
- # A symbol representing the finest level at which language detection should be performed if language detection is turned on.
92
- puts Treat.core.language.detect_at
93
-
94
- # A directory in which to create temporary files.
95
- puts Treat.paths.tmp
96
- # A directory in which to store downloaded files.
97
- puts Treat.paths.files
98
- # A directory containing trained models for various tasks.
99
- puts Treat.paths.models
100
- # A directory containing the spec files.
101
- puts Treat.paths.spec
102
- # A directory containing executables and JAR files.
103
- puts Treat.paths.bin
104
- puts Treat.paths.lib
105
-
106
- # Set up Mongoid.
107
- Treat.databases.mongo.db = 'your_database'
108
- Treat.databases.mongo.host = 'localhost'
109
- Treat.databases.mongo.port = '27017'
110
-
111
- # Transparent string casting.
112
- s = 'inflection'.stem
113
- # is equivalent to
114
- s = 'inflection'.to_entity.stem
115
- # which comes down to
116
- s = word('inflection').stem
117
-
118
- # Transparent number casting.
119
- n = 2.ordinal
120
- # is equivalent to
121
- s = 2.to_entity.ordinal
122
- # which comes down to
123
- s = number(2).ordinal
124
- =end
125
- =begin
126
- ### BASIC USAGE
127
-
128
- # Create a sentence
129
- s = sentence 'Those who dream by day know of at least ' +
130
- '19 things that escape those who dream only at night.'
131
-
132
- # Tokenize and tag it.
133
- s.tokenize.tag
134
-
135
- # View the sentence structure.
136
- s.print_tree
137
-
138
- # Iterate over the tokens.
139
- s.each_token do |tok|
140
- puts tok.value
141
- puts tok.type
142
- end
143
-
144
-
145
-
146
- # Arrays instead of iterators.
147
- (s.nouns + s.adjectives).each do |word|
148
- puts word.synonyms
149
- puts word.antonyms
150
- end
151
-
152
- # Functions on numbers.
153
- s.each_number do |num|
154
- puts num.ordinal
155
- puts num.cardinal
156
- end
157
-
158
- # See all the annotations.
159
- s.each do |tok|
160
- puts tok.inspect
161
- end
162
-
163
- # Lazy way of doing all of the above.
164
- s = sentence 'Those who dream by day know of at least ' +
165
- '19 things that escape those who dream only at night.'
166
-
167
- s.apply :tokenize, :tag, :category,
168
- :stem, :hyponyms, :hypernyms,
169
- :antonyms, :ordinal, :cardinal
170
-
171
- =end
172
-
173
- =begin
174
- ### A BIT MORE ADVANCED USAGE
175
-
176
- section = section "Obama-Sarkozy Meeting\n" +
177
- "Obama and Sarkozy met on January 1st to investigate " +
178
- "the possibility of a new rescue plan. President " +
179
- "Sarkozy is to meet Merkel next Tuesday in Berlin."
180
-
181
- # Chunk: split the titles and paragraphs.
182
- # Segment: perform sentence segmentation.
183
- # Parse: parse the syntax of each sentence.
184
- section.apply :chunk, :segment, :parse
185
-
186
- # View the tree structure.
187
- section.print_tree
188
-
189
- # Get some basic info on the text.
190
- puts section.title
191
- puts section.sentence_count
192
- puts section.word_count
193
-
194
- section.apply :category
195
- puts section.noun_count
196
- puts section.frequency_of 'president'
197
-
198
- section.each_phrase_with_tag('NP') do |phrase|
199
- puts phrase.to_s
200
- end
201
-
202
- =end
203
- =begin
204
- ### URL documents, XML serialization.
205
-
206
- urls = ['http://www.cbc.ca/news/world/story/2012/11/25/snc-lavalin-ben-aissa-charges.html',
207
- 'http://www.cbc.ca/news/world/story/2012/11/25/egypt.html', 'http://www.cbc.ca/news/canada/prince-edward-island/story/2012/11/25/pei-murder-arrest-stlucia.html', 'http://www.cbc.ca/news/world/story/2012/11/25/bangladesh-garment-factory-fire.html']
208
-
209
- c = collection
210
- urls.each { |url| c << document(url) }
211
-
212
- # View the collection.
213
- c.print_tree
214
-
215
- c.apply :chunk, :segment, :tokenize
216
- c.serialize :xml, :file => 'test.xml'
217
-
218
- # Reopen the collection.
219
- c = collection 'test.xml'
220
-
221
- # View it again.
222
- c.print_tree
223
- =end
224
- =begin
225
- include Treat::Core::DSL
226
-
227
- # Show progress bars for download.
228
- Treat.core.verbosity.silence = false
229
- # Explain what Treat is doing.
230
- Treat.core.verbosity.debug = true
231
-
232
- # Define the question "is it junk?" on sentences.
233
- qn = question(:is_junk, :sentence)
234
-
235
- # Frame the problem as depending on punctuation
236
- # count and word count for each sentence.
237
- pb = problem(qn,
238
- feature(:punctuation_count),
239
- feature(:word_count) )
240
-
241
- # Get some web documents to work on.
242
- url1 = 'http://en.wikipedia.org/wiki/NOD_mouse'
243
- url2 = 'http://en.wikipedia.org/wiki/Academic_studies_about_Wikipedia'
244
- d1, d2 = document(url1), document(url2)
245
-
246
- # Process both of our documents.
247
- [d1,d2].apply(:chunk, :segment, :tokenize)
248
-
249
- # Answer our problem to create a training set.
250
- d1.sentences[0..17].each { |s| s.set :is_junk, 0 }
251
- d1.sentences[17..-1].each { |s| s.set :is_junk, 1 }
252
- d_set = d1.export(pb)
253
-
254
- # Define our gold standard results for evaluation.
255
- d2.sentences[0..81].each { |s| s.set :is_true_junk, 0 }
256
- d2.sentences[81..-1].each { |s| s.set :is_true_junk, 1 }
257
-
258
- tp, fp, tn, fn = 0.0, 0.0, 0.0, 0.0
259
-
260
- d2.sentences.map do |s|
261
- pred = s.classify(:id3, training: d_set)
262
- if pred == 1
263
- tp += 1 if s.is_true_junk == 1
264
- fp += 1 if s.is_true_junk == 0
265
- else
266
- tn += 1 if s.is_true_junk == 0
267
- fn += 1 if s.is_true_junk == 1
268
- end
269
- end
270
-
271
- puts "Precision: #{tp/(tp + fp)}"
272
- puts "Recall: #{tp/(tp + fn)}"
273
- =end
274
- =begin
275
- d = document 'http://louismullie.com/susan-text-scan1.jpg'
276
- d.apply :chunk, :segment, :tokenize
277
- d.print_tree
278
- =end
279
- =begin
280
- # Syntax example
281
- phra = phrase 'Obama', 'Sarkozy', 'Meeting'
282
-
283
- para = paragraph 'Obama and Sarkozy met on January 1st to'
284
- 'investigate the possibility of a new rescue plan. Nicolas ' +
285
- 'Sarkozy is to meet Merkel next Tuesday in Berlin.'
286
-
287
- sect = section title(phra), para
288
- =end
289
- =begin
290
- puts "beer".plural.inspect
291
- =end
292
- # Treat.core.language.detect = true
293
- # s = sentence "Du hast deiner Frau einen roten Ring gekauft."
294
- #s.apply(:parse,:category).print_tree