treat 2.0.3 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/treat/config/data/languages/agnostic.rb +6 -3
- data/lib/treat/config/data/languages/english.rb +1 -1
- data/lib/treat/config/data/workers/extractors.rb +8 -0
- data/lib/treat/loaders/stanford.rb +2 -0
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/extractors/distance/levenshtein.rb +35 -0
- data/lib/treat/workers/extractors/name_tag/stanford.rb +4 -1
- data/lib/treat/workers/extractors/similarity/jaro_winkler.rb +38 -0
- data/lib/treat/workers/extractors/similarity/tf_idf.rb +19 -3
- data/lib/treat/workers/extractors/time/chronic.rb +6 -41
- data/lib/treat/workers/extractors/time/kronic.rb +20 -0
- data/lib/treat/workers/extractors/time/nickel.rb +0 -15
- data/lib/treat/workers/extractors/time/ruby.rb +2 -33
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +11 -10
- data/lib/treat/workers/processors/parsers/stanford.rb +60 -112
- data/spec/entities/collection.rb +29 -25
- data/spec/entities/document.rb +45 -44
- data/spec/entities/entity.rb +295 -294
- data/spec/entities/phrase.rb +21 -17
- data/spec/entities/token.rb +43 -40
- data/spec/entities/word.rb +5 -1
- data/spec/entities/zone.rb +26 -22
- data/spec/helper.rb +7 -2
- data/spec/learning/data_set.rb +145 -141
- data/spec/learning/export.rb +46 -42
- data/spec/learning/problem.rb +114 -110
- data/spec/learning/question.rb +46 -42
- data/spec/treat.rb +41 -37
- data/spec/workers/agnostic.rb +2 -2
- data/spec/workers/english.rb +12 -12
- metadata +7 -8
- data/files/21552208.html +0 -786
- data/files/nethttp-cheat-sheet-2940.html +0 -393
- data/lib/treat/workers/extractors/similarity/levenshtein.rb +0 -36
- data/spec/sandbox.rb +0 -294
- data/spec/workers/examples/english/mathematicians/euler.html +0 -21
@@ -1,393 +0,0 @@
|
|
1
|
-
<!doctype html>
|
2
|
-
|
3
|
-
<!--[if lt IE 7 ]> <html lang="en" class="ie6"> <![endif]-->
|
4
|
-
<!--[if IE 7 ]> <html lang="en" class="ie7"> <![endif]-->
|
5
|
-
<!--[if IE 8 ]> <html lang="en" class="ie8"> <![endif]-->
|
6
|
-
<!--[if IE 9 ]> <html lang="en" class="ie9"> <![endif]-->
|
7
|
-
<!--[if (gt IE 9)|!(IE)]><!--> <html lang="en" class="no-js"> <!--<![endif]-->
|
8
|
-
|
9
|
-
<head>
|
10
|
-
<meta charset="utf-8">
|
11
|
-
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
|
12
|
-
<title>Net::HTTP Cheat Sheet</title>
|
13
|
-
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
14
|
-
<link rel="icon" href="/favicon.png" type="image/png">
|
15
|
-
<link rel="shortcut icon" href="/favicon.png" type="image/png">
|
16
|
-
<link rel="alternate" type="application/rss+xml" title="Ruby Inside" href="http://www.rubyinside.com/feed/" />
|
17
|
-
<link rel="stylesheet" href="http://www.rubyinside.com/wp-content/themes/ri2011/css/ri.css">
|
18
|
-
<!-- script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.4.1/jquery.min.js"></script -->
|
19
|
-
<script type="text/javascript" src="http://use.typekit.com/dmj7czx.js"></script>
|
20
|
-
<script type="text/javascript">try{Typekit.load();}catch(e){}</script>
|
21
|
-
<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.7.0/jquery.min.js"></script>
|
22
|
-
|
23
|
-
<!--[if lt IE 9]>
|
24
|
-
<script src="http://html5shiv.googlecode.com/svn/trunk/html5.js"></script>
|
25
|
-
<![endif]-->
|
26
|
-
</head>
|
27
|
-
|
28
|
-
<body class="single single-post postid-2940 single-format-standard">
|
29
|
-
<div id="superheader">Want to stay on top? <a href="http://rubyweekly.com/?m">Ruby Weekly</a> is a once-weekly e-mail newsletter covering the latest Ruby and Rails news.</div>
|
30
|
-
<div id="container">
|
31
|
-
<div class="outerheader top">
|
32
|
-
<div class="right"><form method="get" id="searchform" action="/"><input type="text" value="" name="s" id="s" size="24" /><input type="submit" id="searchsubmit" value="Search" /></form> <a href="http://www.rubyinside.com/feed/"><img src="http://feeds2.feedburner.com/~fc/RubyInside?bg=99CCFF&fg=333333&anim=0" align="top" alt="Feed Icon" /></a></div>
|
33
|
-
<a href="/"><img src="http://www.rubyinside.com/wp-content/themes/ri2011/css/logo.png" id="logo" alt="Ruby Inside - A Ruby Blog" /></a>
|
34
|
-
</div>
|
35
|
-
|
36
|
-
<div class="outerheader masthead">
|
37
|
-
<div class="menu">
|
38
|
-
<ul>
|
39
|
-
<li><a href="/" class="home">Home</a></li> <li><a href="/about/">About</a></li>
|
40
|
-
<li><a href="/archives/">Archives</a></li>
|
41
|
-
<!-- li><a href="/advertise/">Advertising</a></li -->
|
42
|
-
<li><a href="http://jobs.rubyinside.com/">Ruby and Rails Jobs</a></li>
|
43
|
-
<li class="right"><a href="/feed/">RSS</a> <!-- span class="count">(24146)</span --></li>
|
44
|
-
<li class="right">
|
45
|
-
</li>
|
46
|
-
<li class="right"><a href="http://twitter.com/rubyinside">Follow us on Twitter</a> <!-- span class="count">(5548)</span --></li>
|
47
|
-
</ul>
|
48
|
-
</div>
|
49
|
-
|
50
|
-
|
51
|
-
</div>
|
52
|
-
|
53
|
-
<div id="innercontainer"><div id="page">
|
54
|
-
|
55
|
-
|
56
|
-
<div id="sidebar">
|
57
|
-
<div style="margin-top: 12px"><a href="http://twitter.com/RubyInside" class="twitter-follow-button">Follow @RubyInside</a>
|
58
|
-
<script src="http://platform.twitter.com/widgets.js" type="text/javascript"></script></div>
|
59
|
-
<div id="execphp-3" class="widget-container section widget_execphp">
|
60
|
-
<div class="execphpwidget"> <h3><a href="http://jobs.rubyinside.com/">Ruby and Rails Jobs</a></h3>
|
61
|
-
<div class="inner">
|
62
|
-
<ul>
|
63
|
-
<!-- ? readfile('http://www.rubyinside.com/jobs.html'); ? -->
|
64
|
-
<li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/778380">Front-End Engineer</a></div><div class='company'>New Relic</div><div class="location">San Francisco, California</div></li>
|
65
|
-
<li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/780028">Ruby on Rails Developer @WeedMaps</a></div><div class='company'>WeedMaps</div><div class="location">Denver, Colorado</div></li>
|
66
|
-
<li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/766252">Ruby on Rails Developer</a></div><div class='company'>CSD</div><div class="location">Austin, Texas</div></li>
|
67
|
-
<li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/776046">Front-End Developer</a></div><div class='company'>Bedrocket Media Ventures</div><div class="location">New York, New York</div></li>
|
68
|
-
<li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/769030">Telecommute-Principal Ruby Consultant</a></div><div class='company'>IMPRTL Inc</div><div class="location">Chicago, Illinois</div></li>
|
69
|
-
<li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/772705">Senior Rails Developer</a></div><div class='company'>Lightspeed Systems</div><div class="location">Austin, Texas</div></li>
|
70
|
-
<li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/776045">Lead Software Developer</a></div><div class='company'>Bedrocket Media Ventures</div><div class="location">New York, New York</div></li>
|
71
|
-
<li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/771110">Gifted RoR Software Engineer Technologist</a></div><div class='company'>Chelsmore Apartments</div><div class="location">New York, New York</div></li>
|
72
|
-
<div class="minor"><a href="http://ruby.jobamatic.com/">More jobs »</a> : <a href="/post-a-job">Post a Job</a></div>
|
73
|
-
</div>
|
74
|
-
</div>
|
75
|
-
</div><div id="text-3" class="widget-container section widget_text"> <div class="textwidget"><!-- a href="http://owningrails.com/?ref=5905208113"><img src="http://owningrails.com/images/ad-with.png" /></a --><!-- a href="http://rubyweekly.com/"><img src="http://rubyinside.com/wp-content/themes/ri20102/images/rw-ri-box.gif" /></a --><!-- a href="https://cooperpress.com/19walkthrough"><img src="/images/ruby19wt2.gif" /></a --><!-- a href="https://cooperpress.com/rubyreloaded"><img src="http://rubyinside.com/images/reloaded-riad.png" /></a --></div>
|
76
|
-
</div>
|
77
|
-
|
78
|
-
<!-- if (is_front_page() && !is_paged()) { -->
|
79
|
-
</div>
|
80
|
-
|
81
|
-
<div id="main">
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
<div class="post-2940 post type-post status-publish format-standard hentry category-reference" id="post-2940">
|
86
|
-
|
87
|
-
<div class="content">
|
88
|
-
|
89
|
-
<div class="title"><h2>Net::HTTP Cheat Sheet</h2></div>
|
90
|
-
|
91
|
-
<p class="author">By <a href="http://www.rubyinside.com/author/admin" title="View all posts by Peter Cooper">Peter Cooper</a> <span class="date">/ January 16, 2010</span></p>
|
92
|
-
<div id="thecontent"><p><img src="http://www.rubyinside.com/wp-content/uploads/2010/01/http-is-a-hamster-on-rollerblades-says-marc-andre-cournoyer.gif" width="120" height="120" alt="http-is-a-hamster-on-rollerblades-says-marc-andre-cournoyer.gif" style="float:left; margin-right:12px; margin-bottom:12px; border:1px #000000 solid;" />Norwegian Rubyist <a href="http://august.lilleaas.net/">August Lilleaas</a> has been busy putting together <a href="http://github.com/augustl/net-http-cheat-sheet">a ton of examples</a> of using the <a href="http://ruby-doc.org/stdlib/libdoc/net/http/rdoc/index.html">Net::HTTP</a> Ruby library that comes with most Ruby distributions. I asked him if it'd be okay to put some of them directly on Ruby Inside for reference purposes and he said "No problem!"</p>
|
93
|
-
<p>It's worth noting that Net::HTTP has been superseded in many areas by libraries like John Nunemaker's <a href="http://github.com/jnunemaker/httparty">HTTParty</a> and Paul DIx's high performance <a href="http://github.com/pauldix/typhoeus">Typhoeus</a>, but as part of the standard library, Net::HTTP is still a popular option though it doesn't have the easiest API to remember.</p>
|
94
|
-
<p>Here's a selection of August's examples for some of the most common operations. Want to see <i>all</i> of the examples and follow any updates made to them? Check out August's <a href="http://github.com/augustl/net-http-cheat-sheet">net-http-cheat-sheet GitHub repo</a>.</p>
|
95
|
-
<h3>Standard HTTP Request</h3>
|
96
|
-
<pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
|
97
|
-
<span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
|
98
|
-
|
99
|
-
<span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://google.com/</span><span class="punct">")</span>
|
100
|
-
|
101
|
-
<span class="comment"># Shortcut</span>
|
102
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">get_response</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">)</span>
|
103
|
-
|
104
|
-
<span class="comment"># Will print response.body</span>
|
105
|
-
<span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">get_print</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">)</span>
|
106
|
-
|
107
|
-
<span class="comment"># Full</span>
|
108
|
-
<span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
|
109
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">))</span></pre>
|
110
|
-
<h3>Basic Auth</h3>
|
111
|
-
<pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
|
112
|
-
<span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
|
113
|
-
|
114
|
-
<span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://google.com/</span><span class="punct">")</span>
|
115
|
-
|
116
|
-
<span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
|
117
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
|
118
|
-
<span class="ident">request</span><span class="punct">.</span><span class="ident">basic_auth</span><span class="punct">("</span><span class="string">username</span><span class="punct">",</span> <span class="punct">"</span><span class="string">password</span><span class="punct">")</span>
|
119
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span></pre>
|
120
|
-
<h3>Dealing with response objects</h3>
|
121
|
-
<pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
|
122
|
-
<span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
|
123
|
-
|
124
|
-
<span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://google.com/</span><span class="punct">")</span>
|
125
|
-
|
126
|
-
<span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
|
127
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
|
128
|
-
|
129
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
|
130
|
-
|
131
|
-
<span class="ident">response</span><span class="punct">.</span><span class="ident">code</span> <span class="comment"># => 301</span>
|
132
|
-
<span class="ident">response</span><span class="punct">.</span><span class="ident">body</span> <span class="comment"># => The body (HTML, XML, blob, whatever)</span>
|
133
|
-
<span class="comment"># Headers are lowercased</span>
|
134
|
-
<span class="ident">response</span><span class="punct">["</span><span class="string">cache-control</span><span class="punct">"]</span> <span class="comment"># => public, max-age=2592000</span></pre>
|
135
|
-
<h3>POST form request</h3>
|
136
|
-
<pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
|
137
|
-
<span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
|
138
|
-
|
139
|
-
<span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://example.com/search</span><span class="punct">")</span>
|
140
|
-
|
141
|
-
<span class="comment"># Shortcut</span>
|
142
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">post_form</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">,</span> <span class="punct">{"</span><span class="string">q</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">My query</span><span class="punct">",</span> <span class="punct">"</span><span class="string">per_page</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">50</span><span class="punct">"})</span>
|
143
|
-
|
144
|
-
<span class="comment"># Full control</span>
|
145
|
-
<span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
|
146
|
-
|
147
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Post</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
|
148
|
-
<span class="ident">request</span><span class="punct">.</span><span class="ident">set_form_data</span><span class="punct">({"</span><span class="string">q</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">My query</span><span class="punct">",</span> <span class="punct">"</span><span class="string">per_page</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">50</span><span class="punct">"})</span>
|
149
|
-
|
150
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span></pre>
|
151
|
-
<h3>File upload - input type="file" style</h3>
|
152
|
-
<pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
|
153
|
-
<span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
|
154
|
-
|
155
|
-
<span class="comment"># Token used to terminate the file in the post body. Make sure it is not</span>
|
156
|
-
<span class="comment"># present in the file you're uploading.</span>
|
157
|
-
<span class="constant">BOUNDARY</span> <span class="punct">=</span> <span class="punct">"</span><span class="string">AaB03x</span><span class="punct">"</span>
|
158
|
-
|
159
|
-
<span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://something.com/uploads</span><span class="punct">")</span>
|
160
|
-
<span class="ident">file</span> <span class="punct">=</span> <span class="punct">"</span><span class="string">/path/to/your/testfile.txt</span><span class="punct">"</span>
|
161
|
-
|
162
|
-
<span class="ident">post_body</span> <span class="punct">=</span> <span class="punct">[]</span>
|
163
|
-
<span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string">--<span class="expr">#{BOUNDARY}</span><span class="escape">rn</span></span><span class="punct">"</span>
|
164
|
-
<span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string">Content-Disposition: form-data; name=<span class="escape">"</span>datafile<span class="escape">"</span>; filename=<span class="escape">"</span><span class="expr">#{File.basename(file)}</span><span class="escape">"rn</span></span><span class="punct">"</span>
|
165
|
-
<span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string">Content-Type: text/plain<span class="escape">rn</span></span><span class="punct">"</span>
|
166
|
-
<span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string"><span class="escape">rn</span></span><span class="punct">"</span>
|
167
|
-
<span class="ident">post_body</span> <span class="punct">< <</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">(</span><span class="ident">file</span><span class="punct">)</span>
|
168
|
-
<span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string"><span class="escape">rn</span>--<span class="expr">#{BOUNDARY}</span>--<span class="escape">rn</span></span><span class="punct">"</span>
|
169
|
-
|
170
|
-
<span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
|
171
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Post</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
|
172
|
-
<span class="ident">request</span><span class="punct">.</span><span class="ident">body</span> <span class="punct">=</span> <span class="ident">post_body</span><span class="punct">.</span><span class="ident">join</span>
|
173
|
-
<span class="ident">request</span><span class="punct">["</span><span class="string">Content-Type</span><span class="punct">"]</span> <span class="punct">=</span> <span class="punct">"</span><span class="string">multipart/form-data, boundary=<span class="expr">#{BOUNDARY}</span></span><span class="punct">"</span>
|
174
|
-
|
175
|
-
<span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span></span></span></span></span></span></span></pre>
|
176
|
-
<h3>SSL/HTTPS request</h3>
|
177
|
-
<p><strong>Update: There are some good reasons why this code example is bad. It introduces a potential security vulnerability if it's essential you use the server certificate to verify the identity of the server you're connecting to. There's <a href="http://www.rubyinside.com/how-to-cure-nethttps-risky-default-https-behavior-4010.html">a fix for the issue though!</a></strong></p>
|
178
|
-
<pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/https</span><span class="punct">"</span>
|
179
|
-
<span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
|
180
|
-
|
181
|
-
<span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">https://secure.com/</span><span class="punct">")</span>
|
182
|
-
<span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
|
183
|
-
<span class="ident">http</span><span class="punct">.</span><span class="ident">use_ssl</span> <span class="punct">=</span> <span class="constant">true</span>
|
184
|
-
<span class="ident">http</span><span class="punct">.</span><span class="ident">verify_mode</span> <span class="punct">=</span> <span class="constant">OpenSSL</span><span class="punct">::</span><span class="constant">SSL</span><span class="punct">::</span><span class="constant">VERIFY_NONE</span>
|
185
|
-
|
186
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
|
187
|
-
|
188
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
|
189
|
-
<span class="ident">response</span><span class="punct">.</span><span class="ident">body</span>
|
190
|
-
<span class="ident">response</span><span class="punct">.</span><span class="ident">status</span>
|
191
|
-
<span class="ident">response</span><span class="punct">["</span><span class="string">header-here</span><span class="punct">"]</span> <span class="comment"># All headers are lowercase</span></pre>
|
192
|
-
<h3>SSL/HTTPS request with PEM certificate</h3>
|
193
|
-
<pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/https</span><span class="punct">"</span>
|
194
|
-
<span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
|
195
|
-
|
196
|
-
<span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">https://secure.com/</span><span class="punct">")</span>
|
197
|
-
<span class="ident">pem</span> <span class="punct">=</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">("</span><span class="string">/path/to/my.pem</span><span class="punct">")</span>
|
198
|
-
<span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
|
199
|
-
<span class="ident">http</span><span class="punct">.</span><span class="ident">use_ssl</span> <span class="punct">=</span> <span class="constant">true</span>
|
200
|
-
<span class="ident">http</span><span class="punct">.</span><span class="ident">cert</span> <span class="punct">=</span> <span class="constant">OpenSSL</span><span class="punct">::</span><span class="constant">X509</span><span class="punct">::</span><span class="constant">Certificate</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">pem</span><span class="punct">)</span>
|
201
|
-
<span class="ident">http</span><span class="punct">.</span><span class="ident">key</span> <span class="punct">=</span> <span class="constant">OpenSSL</span><span class="punct">::</span><span class="constant">PKey</span><span class="punct">::</span><span class="constant">RSA</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">pem</span><span class="punct">)</span>
|
202
|
-
<span class="ident">http</span><span class="punct">.</span><span class="ident">verify_mode</span> <span class="punct">=</span> <span class="constant">OpenSSL</span><span class="punct">::</span><span class="constant">SSL</span><span class="punct">::</span><span class="constant">VERIFY_PEER</span>
|
203
|
-
|
204
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span></pre>
|
205
|
-
<h3>REST methods</h3>
|
206
|
-
<pre><span class="comment"># Basic REST.</span>
|
207
|
-
<span class="comment"># Most REST APIs will set semantic values in response.body and response.code.</span>
|
208
|
-
<span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
|
209
|
-
|
210
|
-
<span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">api.restsite.com</span><span class="punct">")</span>
|
211
|
-
|
212
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Post</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">/users</span><span class="punct">")</span>
|
213
|
-
<span class="ident">request</span><span class="punct">.</span><span class="ident">set_form_data</span><span class="punct">({"</span><span class="string">users[login]</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">quentin</span><span class="punct">"})</span>
|
214
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
|
215
|
-
<span class="comment"># Use nokogiri, hpricot, etc to parse response.body.</span>
|
216
|
-
|
217
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">/users/1</span><span class="punct">")</span>
|
218
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
|
219
|
-
<span class="comment"># As with POST, the data is in response.body.</span>
|
220
|
-
|
221
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Put</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">/users/1</span><span class="punct">")</span>
|
222
|
-
<span class="ident">request</span><span class="punct">.</span><span class="ident">set_form_data</span><span class="punct">({"</span><span class="string">users[login]</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">changed</span><span class="punct">"})</span>
|
223
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
|
224
|
-
|
225
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Delete</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">/users/1</span><span class="punct">")</span>
|
226
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span></pre>
|
227
|
-
<p>There are more in August's repo if you want to keep browsing..</p>
|
228
|
-
<p style="background-color: #ffc; font-weight: bold; font-size: 13px; color: #000;">Job: New Relic is <a href="http://ruby.jobamatic.com/a/jbb/job-details/165476">looking for a Ruby on Rails developer in Portland, Oregon.</a></p>
|
229
|
-
</div>
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
<!-- div style="margin-bottom: 8px; background-color: #ffc; text-align: center; padding: 6px"><a href="http://zfer.us/EKm97" style="text-decoration: none; margin: 0; padding: 0" rel="nofollow"><img src="http://www.rubyinside.com/images/railstutorial-box.gif" /></a></div -->
|
235
|
-
<!-- <a href="http://www.rubyinside.com/19walkthrough/"><img src="http://www.rubyinside.com/images/19pro.gif" /></a> -->
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
</div>
|
242
|
-
</div>
|
243
|
-
|
244
|
-
<div id="related"><h3>Related Posts</h3><ul><li><a href='http://www.rubyinside.com/cheat-sheet-for-rest-on-rails-261.html' rel='bookmark' title='Cheat Sheet for REST on Rails'>Cheat Sheet for REST on Rails</a></li>
|
245
|
-
<li><a href='http://www.rubyinside.com/quick-ruby-reference-cheat-sheet-47.html' rel='bookmark' title='Quick Ruby Reference / Cheat Sheet'>Quick Ruby Reference / Cheat Sheet</a></li>
|
246
|
-
<li><a href='http://www.rubyinside.com/ruby-on-rails-testing-cheat-sheet-206.html' rel='bookmark' title='Ruby on Rails Testing Cheat Sheet'>Ruby on Rails Testing Cheat Sheet</a></li>
|
247
|
-
</ul></div>
|
248
|
-
<div id="commentzone">
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
<h3 id="comments-title">Comments</h3>
|
253
|
-
|
254
|
-
<ol class="commentlist">
|
255
|
-
<li class="comment even thread-even depth-1" id="comment-40542">
|
256
|
-
<img alt='' src='http://0.gravatar.com/avatar/6268c7528d855f1cef5696a00d159909?s=64&d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D64&r=G' class='avatar avatar-64 photo' height='64' width='64' /> <cite><a href='http://twitter.com/peterc' rel='external nofollow' class='url'>Peter Cooper</a> says:</cite><br />
|
257
|
-
|
258
|
-
<small class="commentmetadata">January 16, 2010 at 1:02 am</small>
|
259
|
-
|
260
|
-
|
261
|
-
<p>The idea for the hamster on rollerskates issue can be discovered by checking out its filename.. :-)</p>
|
262
|
-
|
263
|
-
|
264
|
-
</li> <li class="comment odd alt thread-odd thread-alt depth-1" id="comment-40546">
|
265
|
-
<img alt='' src='http://0.gravatar.com/avatar/aa31b79adedc3f60547769f1a8971ba6?s=64&d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D64&r=G' class='avatar avatar-64 photo' height='64' width='64' /> <cite><a href='http://emmanueloga@gmail.com' rel='external nofollow' class='url'>Emmanuel</a> says:</cite><br />
|
266
|
-
|
267
|
-
<small class="commentmetadata">January 16, 2010 at 3:50 pm</small>
|
268
|
-
|
269
|
-
|
270
|
-
<p>Anybody knows if the HTTP::Net issues related with Timeout is still present in all (or any) ruby versions?</p>
|
271
|
-
<p><a href="http://blog.headius.com/2008/02/rubys-threadraise-threadkill-timeoutrb.html" rel="nofollow">http://blog.headius.com/2008/02/rubys-threadraise-threadkill-timeoutrb.html</a></p>
|
272
|
-
|
273
|
-
|
274
|
-
</li> <li class="comment even thread-even depth-1" id="comment-40550">
|
275
|
-
<img alt='' src='http://0.gravatar.com/avatar/2bfc6436d28fc4a224e3ff1702a046d0?s=64&d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D64&r=G' class='avatar avatar-64 photo' height='64' width='64' /> <cite>ratbeard says:</cite><br />
|
276
|
-
|
277
|
-
<small class="commentmetadata">January 17, 2010 at 4:23 pm</small>
|
278
|
-
|
279
|
-
|
280
|
-
<p>Another library that abstracts over Net::HTTP is Adam Wiggin's 'rest-client' library. It seems more targeted toward single requests (i.e. Restclient.get 'google.com') than mixing in and building a request class like HTTParty, though both libraries do variants of each style. It has a great interactive shell and request logging.</p>
|
281
|
-
<p><a href="http://github.com/archiloque/rest-client/" rel="nofollow">http://github.com/archiloque/rest-client/</a></p>
|
282
|
-
<p>I **highly** recommend another library authored by Mr. Wiggins that abstracts over the messy file system api, 'rush'. I'm really surprised that more libraries and apps that have a non-trivial amount of file system code don't use it.</p>
|
283
|
-
<p><a href="http://rush.heroku.com/" rel="nofollow">http://rush.heroku.com/</a></p>
|
284
|
-
<p>The code is very clean in both, I tip my hat to you Mr. Wiggins.</p>
|
285
|
-
|
286
|
-
|
287
|
-
</li> <li class="comment odd alt thread-odd thread-alt depth-1" id="comment-40556">
|
288
|
-
<img alt='' src='http://0.gravatar.com/avatar/e61f142f400df8299d37c2bce09e3478?s=64&d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D64&r=G' class='avatar avatar-64 photo' height='64' width='64' /> <cite><a href='http://august.lilleaas.net/' rel='external nofollow' class='url'>August Lilleaas</a> says:</cite><br />
|
289
|
-
|
290
|
-
<small class="commentmetadata">January 19, 2010 at 10:19 am</small>
|
291
|
-
|
292
|
-
|
293
|
-
<p>I always use one of these libraries when I can (rest-client, httparty, ...). In some cases you'd be better off without dependencies, though, such as in small shell scripts, etc.</p>
|
294
|
-
|
295
|
-
|
296
|
-
</li> </ol>
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
</div>
|
301
|
-
|
302
|
-
<h3>Other Posts to Enjoy</h3>
|
303
|
-
<div class="widget_featured-posts noborder four">
|
304
|
-
<ul class="clearfix"><li>
|
305
|
-
|
306
|
-
<a href="http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html"><img src="http://www.rubyinside.com/wp-content/plugins/wp-featured-post-with-thumbnail/scripts/timthumb.php?src=http://www.rubyinside.com/wp-content/uploads/2012/11/spaced.png&h=73&w=73&zc=1" class="alignleft" alt="The Split is Not Enough: Unicode Whitespace Shenigans for Rubyists" /></a>
|
307
|
-
<h4 class="featured-title"><a href="http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html">The Split is Not Enough: Unicode Whitespace Shenigans for Rubyists</a></h4>
|
308
|
-
</li>
|
309
|
-
<li>
|
310
|
-
|
311
|
-
<a href="http://www.rubyinside.com/mega-february-2012-ruby-news-5815.html"><img src="http://www.rubyinside.com/wp-content/plugins/wp-featured-post-with-thumbnail/scripts/timthumb.php?src=http://www.rubyinside.com/wp-content/uploads/2012/03/mega.png&h=73&w=73&zc=1" class="alignleft" alt="The Mega Ruby News and Release Roundup for February 2012" /></a>
|
312
|
-
<h4 class="featured-title"><a href="http://www.rubyinside.com/mega-february-2012-ruby-news-5815.html">The Mega Ruby News and Release Roundup for February 2012</a></h4>
|
313
|
-
</li>
|
314
|
-
<li>
|
315
|
-
|
316
|
-
<a href="http://www.rubyinside.com/sinatra-book-review-5704.html"><img src="http://www.rubyinside.com/wp-content/plugins/wp-featured-post-with-thumbnail/scripts/timthumb.php?src=http://www.rubyinside.com/wp-content/uploads/2011/12/sinatra-up.jpeg&h=73&w=73&zc=1" class="alignleft" alt="A Lagom Review of O’Reilly’s ‘Sinatra Up and Running’" /></a>
|
317
|
-
<h4 class="featured-title"><a href="http://www.rubyinside.com/sinatra-book-review-5704.html">A Lagom Review of O’Reilly’s ‘Sinatra Up and Running’</a></h4>
|
318
|
-
</li>
|
319
|
-
<li>
|
320
|
-
|
321
|
-
<a href="http://www.rubyinside.com/rubys-unary-operators-and-how-to-redefine-their-functionality-5610.html"><img src="http://www.rubyinside.com/wp-content/plugins/wp-featured-post-with-thumbnail/scripts/timthumb.php?src=http://www.rubyinside.com/wp-content/uploads/2011/11/unary.gif&h=73&w=73&zc=1" class="alignleft" alt="Ruby’s Unary Operators and How to Define Their Functionality" /></a>
|
322
|
-
<h4 class="featured-title"><a href="http://www.rubyinside.com/rubys-unary-operators-and-how-to-redefine-their-functionality-5610.html">Ruby’s Unary Operators and How to Define Their Functionality</a></h4>
|
323
|
-
</li>
|
324
|
-
</ul> </div>
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
<h3>Twitter Mentions</h3>
|
331
|
-
<div id="boastful"></div>
|
332
|
-
|
333
|
-
<div class="previousnext">
|
334
|
-
<div class="next"><a href="http://www.rubyinside.com/this-weeks-ruby-news-rspec-2-8-0-rc1-minitest-2-8-0-and-whats-new-in-bundler-1-1-5637.html" rel="next">Next Post »</a></div>
|
335
|
-
<div class="previous"><a href="http://www.rubyinside.com/the-ruby-standard-library-to-be-converted-to-gems-for-ruby-2-0-5586.html" rel="prev">« Previous Post</a></div>
|
336
|
-
</div>
|
337
|
-
|
338
|
-
<!-- <h3>Want to get up to speed with Ruby 1.9?</h3>
|
339
|
-
|
340
|
-
<p><a href="http://www.rubyinside.com/19walkthrough/"><img src="http://www.rubyinside.com/images/19pro.gif" /></a> </p>
|
341
|
-
-->
|
342
|
-
|
343
|
-
|
344
|
-
</div>
|
345
|
-
|
346
|
-
</div>
|
347
|
-
|
348
|
-
</div> <!-- inner -->
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
</div> <!-- container -->
|
354
|
-
|
355
|
-
|
356
|
-
<div id="footer">
|
357
|
-
<div class="inner">
|
358
|
-
<p>Copyright © 2006–2012 <a href="http://twitter.com/peterc">Peter Cooper</a></p>
|
359
|
-
</div>
|
360
|
-
</div>
|
361
|
-
|
362
|
-
<script type="text/javascript" src="http://engine.rubyrow.net/z/1313/adzerk1_4_16_19,adzerk2_4_16_19,adzerk3_4_16_19,adzerk4_4_16_19,adzerk5_4_16_19,adzerk6_4_16_19,adzerk7_4_16_19,adzerk8_4_16_19"></script>
|
363
|
-
<script type="text/javascript" src="http://www.rubyinside.com/wp-content/themes/ri2011/jquery.boastful.js"></script>
|
364
|
-
|
365
|
-
<script type="text/javascript">
|
366
|
-
$(document).ready(function() {
|
367
|
-
$('#boastful').boastful();
|
368
|
-
});
|
369
|
-
</script>
|
370
|
-
|
371
|
-
<script type="text/javascript">
|
372
|
-
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
|
373
|
-
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
|
374
|
-
</script>
|
375
|
-
<script type="text/javascript">
|
376
|
-
var pageTracker = _gat._getTracker("UA-2237791-3");
|
377
|
-
pageTracker._initData();
|
378
|
-
pageTracker._trackPageview();
|
379
|
-
</script>
|
380
|
-
|
381
|
-
|
382
|
-
</body>
|
383
|
-
</html>
|
384
|
-
|
385
|
-
<!-- div style="float:right"><a href="http://twitter.com/share" class="twitter-share-button" data-url="http://www.rubyinside.com/rubys-unary-operators-and-how-to-redefine-their-functionality-5610.html" data-counturl="http://www.rubyinside.com/rubys-unary-operators-and-how-to-redefine-their-functionality-5610.html" data-text="Ruby’s Unary Operators and How to Define Their Functionality" data-count="horizontal">Tweet</a></div -->
|
386
|
-
<!-- div style="margin-bottom: -12px; margin-top: -10px"><iframe src="http://www.facebook.com/plugins/like.php?href=http%3A%2F%2Fwww.rubyinside.com%2Frubys-unary-operators-and-how-to-redefine-their-functionality-5610.html&layout=standard&show_faces=false&width=420&action=like&colorscheme=light&height=26" scrolling="no" frameborder="0" style="border:none; overflow:hidden; width:420px; height:26px;" allowTransparency="true"></iframe></div -->
|
387
|
-
<!-- div><script src="http://connect.facebook.net/en_US/all.js#xfbml=1"></script><fb:like href="http://x.com/" show_faces="false" width="450"></fb:like></div -->
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
<!-- Dynamic page generated in 0.233 seconds. -->
|
392
|
-
<!-- Cached page generated by WP-Super-Cache on 2013-01-04 01:31:41 -->
|
393
|
-
<!-- super cache -->
|
@@ -1,36 +0,0 @@
|
|
1
|
-
class Treat::Workers::Extractors::Similarity
|
2
|
-
# Default options.
|
3
|
-
DefaultOptions = {
|
4
|
-
with: '',
|
5
|
-
ins_cost: 1,
|
6
|
-
del_cost: 1,
|
7
|
-
sub_cost: 1
|
8
|
-
}
|
9
|
-
# Return the levensthein distance between
|
10
|
-
# two strings taking into account the costs
|
11
|
-
# of insertion, deletion, and substitution.
|
12
|
-
# Used by did_you_mean? to detect typos.
|
13
|
-
def self.similarity(entity, options)
|
14
|
-
first, other = entity.to_s, options[:with].to_s
|
15
|
-
options = DefaultOptions.merge(options)
|
16
|
-
other, ins, del, sub, = options[:with],
|
17
|
-
options[:inst_cost], options[:del_cost],
|
18
|
-
options[:sub_cost]
|
19
|
-
fill, dm = [0] * (first.length - 1).abs,
|
20
|
-
[(0..first.length).collect { |i| i * ins}]
|
21
|
-
for i in 1..other.length
|
22
|
-
dm[i] = [i * del, fill.flatten]
|
23
|
-
end
|
24
|
-
for i in 1..other.length
|
25
|
-
for j in 1..first.length
|
26
|
-
dm[i][j] = [
|
27
|
-
dm[i-1][j-1] + (first[i-1] ==
|
28
|
-
other[i-1] ? 0 : sub), dm[i][j-1] +
|
29
|
-
ins, dm[i-1][j] + del
|
30
|
-
].min
|
31
|
-
end
|
32
|
-
end
|
33
|
-
dm[other.length][first.length]
|
34
|
-
end
|
35
|
-
|
36
|
-
end
|
data/spec/sandbox.rb
DELETED
@@ -1,294 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
require_relative '../lib/treat'
|
3
|
-
|
4
|
-
Treat.databases.mongo.db = 'treat_test'
|
5
|
-
Treat.libraries.stanford.model_path =
|
6
|
-
'/ruby/stanford-core-nlp-minimal/models/'
|
7
|
-
Treat.libraries.stanford.jar_path =
|
8
|
-
'/ruby/stanford-core-nlp-minimal/bin/'
|
9
|
-
Treat.libraries.punkt.model_path =
|
10
|
-
'/ruby/punkt/models/'
|
11
|
-
Treat.libraries.reuters.model_path =
|
12
|
-
'/ruby/reuters/models/'
|
13
|
-
|
14
|
-
# include Treat::Core::DSL
|
15
|
-
|
16
|
-
Treat::Builder.new do
|
17
|
-
s = sentence "Hello, world!"
|
18
|
-
s.print_tree
|
19
|
-
end
|
20
|
-
|
21
|
-
p = paragraph('A walk in the park. A trip on a boat.').segment
|
22
|
-
p.visualize :dot, file: 'test.dot'
|
23
|
-
=begin
|
24
|
-
|
25
|
-
g = group("I was running")
|
26
|
-
puts g.tag.inspect
|
27
|
-
|
28
|
-
Treat.libraries.stanford.jar_path = '/ruby/treat/bin/'
|
29
|
-
Treat.libraries.stanford.model_path = '/ruby/treat/models/'
|
30
|
-
|
31
|
-
p = paragraph
|
32
|
-
s = sentence
|
33
|
-
w = word
|
34
|
-
|
35
|
-
p = phrase 'hello world'
|
36
|
-
e = email 'louis@gmail.com'
|
37
|
-
|
38
|
-
d = question(:is_feature, :word)
|
39
|
-
=end
|
40
|
-
#d = document Treat.paths.spec + 'workers/examples/english/economist/hungarys_troubles.txt'
|
41
|
-
#d.apply :chunk, :segment, :tokenize, :tag, :category, :name_tag
|
42
|
-
#d.print_tree
|
43
|
-
#d = document Treat.paths.spec + 'workers/examples/english/economist/saving_the_euro.odt'
|
44
|
-
#d.print_tree
|
45
|
-
=begin
|
46
|
-
d = document 'test.htm'
|
47
|
-
d.apply :chunk
|
48
|
-
#d.serialize :yaml, file: 'test444.yaml'
|
49
|
-
d.set :test, 2
|
50
|
-
d.serialize :mongo, db: 'test'
|
51
|
-
d.set :test, 3
|
52
|
-
d.serialize :mongo, db: 'test'
|
53
|
-
d.apply :segment, :tokenize, :tag, :category
|
54
|
-
puts d.verb_count
|
55
|
-
#d2 = document id: d.id, db: 'test'
|
56
|
-
d2 = document 'features.test' => 3, db: 'test'
|
57
|
-
d2.apply :segment, :tokenize, :tag, :category
|
58
|
-
puts d2.verb_count
|
59
|
-
#d.print_tree
|
60
|
-
#s = document 'http://www.economist.com'
|
61
|
-
|
62
|
-
p = phrase 'hello', 'world', '!'
|
63
|
-
puts p.to_s
|
64
|
-
puts p.to_str
|
65
|
-
=end
|
66
|
-
|
67
|
-
=begin
|
68
|
-
### Super basics.
|
69
|
-
puts p.value
|
70
|
-
|
71
|
-
p << 'bitch'
|
72
|
-
p << word('hello')
|
73
|
-
puts p.to_s
|
74
|
-
puts p.to_str
|
75
|
-
puts p.value
|
76
|
-
puts p.to_ary.inspect
|
77
|
-
=end
|
78
|
-
|
79
|
-
=begin
|
80
|
-
|
81
|
-
### Configuration
|
82
|
-
|
83
|
-
# A boolean value indicating whether to silence the output of external libraries (e.g. Stanford tools, Enju, LDA, Ruby-FANN) when they are used.
|
84
|
-
puts Treat.core.verbosity.silence
|
85
|
-
# A boolean value indicating whether to explain the steps that Treat is performing.
|
86
|
-
puts Treat.core.verbosity.debug
|
87
|
-
# A boolean value indicating whether Treat should try to detect the language of newly input text.
|
88
|
-
puts Treat.core.language.detect
|
89
|
-
# The language to default to when detection is off.
|
90
|
-
puts Treat.core.language.default
|
91
|
-
# A symbol representing the finest level at which language detection should be performed if language detection is turned on.
|
92
|
-
puts Treat.core.language.detect_at
|
93
|
-
|
94
|
-
# A directory in which to create temporary files.
|
95
|
-
puts Treat.paths.tmp
|
96
|
-
# A directory in which to store downloaded files.
|
97
|
-
puts Treat.paths.files
|
98
|
-
# A directory containing trained models for various tasks.
|
99
|
-
puts Treat.paths.models
|
100
|
-
# A directory containing the spec files.
|
101
|
-
puts Treat.paths.spec
|
102
|
-
# A directory containing executables and JAR files.
|
103
|
-
puts Treat.paths.bin
|
104
|
-
puts Treat.paths.lib
|
105
|
-
|
106
|
-
# Set up Mongoid.
|
107
|
-
Treat.databases.mongo.db = 'your_database'
|
108
|
-
Treat.databases.mongo.host = 'localhost'
|
109
|
-
Treat.databases.mongo.port = '27017'
|
110
|
-
|
111
|
-
# Transparent string casting.
|
112
|
-
s = 'inflection'.stem
|
113
|
-
# is equivalent to
|
114
|
-
s = 'inflection'.to_entity.stem
|
115
|
-
# which comes down to
|
116
|
-
s = word('inflection').stem
|
117
|
-
|
118
|
-
# Transparent number casting.
|
119
|
-
n = 2.ordinal
|
120
|
-
# is equivalent to
|
121
|
-
s = 2.to_entity.ordinal
|
122
|
-
# which comes down to
|
123
|
-
s = number(2).ordinal
|
124
|
-
=end
|
125
|
-
=begin
|
126
|
-
### BASIC USAGE
|
127
|
-
|
128
|
-
# Create a sentence
|
129
|
-
s = sentence 'Those who dream by day know of at least ' +
|
130
|
-
'19 things that escape those who dream only at night.'
|
131
|
-
|
132
|
-
# Tokenize and tag it.
|
133
|
-
s.tokenize.tag
|
134
|
-
|
135
|
-
# View the sentence structure.
|
136
|
-
s.print_tree
|
137
|
-
|
138
|
-
# Iterate over the tokens.
|
139
|
-
s.each_token do |tok|
|
140
|
-
puts tok.value
|
141
|
-
puts tok.type
|
142
|
-
end
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
# Arrays instead of iterators.
|
147
|
-
(s.nouns + s.adjectives).each do |word|
|
148
|
-
puts word.synonyms
|
149
|
-
puts word.antonyms
|
150
|
-
end
|
151
|
-
|
152
|
-
# Functions on numbers.
|
153
|
-
s.each_number do |num|
|
154
|
-
puts num.ordinal
|
155
|
-
puts num.cardinal
|
156
|
-
end
|
157
|
-
|
158
|
-
# See all the annotations.
|
159
|
-
s.each do |tok|
|
160
|
-
puts tok.inspect
|
161
|
-
end
|
162
|
-
|
163
|
-
# Lazy way of doing all of the above.
|
164
|
-
s = sentence 'Those who dream by day know of at least ' +
|
165
|
-
'19 things that escape those who dream only at night.'
|
166
|
-
|
167
|
-
s.apply :tokenize, :tag, :category,
|
168
|
-
:stem, :hyponyms, :hypernyms,
|
169
|
-
:antonyms, :ordinal, :cardinal
|
170
|
-
|
171
|
-
=end
|
172
|
-
|
173
|
-
=begin
|
174
|
-
### A BIT MORE ADVANCED USAGE
|
175
|
-
|
176
|
-
section = section "Obama-Sarkozy Meeting\n" +
|
177
|
-
"Obama and Sarkozy met on January 1st to investigate " +
|
178
|
-
"the possibility of a new rescue plan. President " +
|
179
|
-
"Sarkozy is to meet Merkel next Tuesday in Berlin."
|
180
|
-
|
181
|
-
# Chunk: split the titles and paragraphs.
|
182
|
-
# Segment: perform sentence segmentation.
|
183
|
-
# Parse: parse the syntax of each sentence.
|
184
|
-
section.apply :chunk, :segment, :parse
|
185
|
-
|
186
|
-
# View the tree structure.
|
187
|
-
section.print_tree
|
188
|
-
|
189
|
-
# Get some basic info on the text.
|
190
|
-
puts section.title
|
191
|
-
puts section.sentence_count
|
192
|
-
puts section.word_count
|
193
|
-
|
194
|
-
section.apply :category
|
195
|
-
puts section.noun_count
|
196
|
-
puts section.frequency_of 'president'
|
197
|
-
|
198
|
-
section.each_phrase_with_tag('NP') do |phrase|
|
199
|
-
puts phrase.to_s
|
200
|
-
end
|
201
|
-
|
202
|
-
=end
|
203
|
-
=begin
|
204
|
-
### URL documents, XML serialization.
|
205
|
-
|
206
|
-
urls = ['http://www.cbc.ca/news/world/story/2012/11/25/snc-lavalin-ben-aissa-charges.html',
|
207
|
-
'http://www.cbc.ca/news/world/story/2012/11/25/egypt.html', 'http://www.cbc.ca/news/canada/prince-edward-island/story/2012/11/25/pei-murder-arrest-stlucia.html', 'http://www.cbc.ca/news/world/story/2012/11/25/bangladesh-garment-factory-fire.html']
|
208
|
-
|
209
|
-
c = collection
|
210
|
-
urls.each { |url| c << document(url) }
|
211
|
-
|
212
|
-
# View the collection.
|
213
|
-
c.print_tree
|
214
|
-
|
215
|
-
c.apply :chunk, :segment, :tokenize
|
216
|
-
c.serialize :xml, :file => 'test.xml'
|
217
|
-
|
218
|
-
# Reopen the collection.
|
219
|
-
c = collection 'test.xml'
|
220
|
-
|
221
|
-
# View it again.
|
222
|
-
c.print_tree
|
223
|
-
=end
|
224
|
-
=begin
|
225
|
-
include Treat::Core::DSL
|
226
|
-
|
227
|
-
# Show progress bars for download.
|
228
|
-
Treat.core.verbosity.silence = false
|
229
|
-
# Explain what Treat is doing.
|
230
|
-
Treat.core.verbosity.debug = true
|
231
|
-
|
232
|
-
# Define the question "is it junk?" on sentences.
|
233
|
-
qn = question(:is_junk, :sentence)
|
234
|
-
|
235
|
-
# Frame the problem as depending on punctuation
|
236
|
-
# count and word count for each sentence.
|
237
|
-
pb = problem(qn,
|
238
|
-
feature(:punctuation_count),
|
239
|
-
feature(:word_count) )
|
240
|
-
|
241
|
-
# Get some web documents to work on.
|
242
|
-
url1 = 'http://en.wikipedia.org/wiki/NOD_mouse'
|
243
|
-
url2 = 'http://en.wikipedia.org/wiki/Academic_studies_about_Wikipedia'
|
244
|
-
d1, d2 = document(url1), document(url2)
|
245
|
-
|
246
|
-
# Process both of our documents.
|
247
|
-
[d1,d2].apply(:chunk, :segment, :tokenize)
|
248
|
-
|
249
|
-
# Answer our problem to create a training set.
|
250
|
-
d1.sentences[0..17].each { |s| s.set :is_junk, 0 }
|
251
|
-
d1.sentences[17..-1].each { |s| s.set :is_junk, 1 }
|
252
|
-
d_set = d1.export(pb)
|
253
|
-
|
254
|
-
# Define our gold standard results for evaluation.
|
255
|
-
d2.sentences[0..81].each { |s| s.set :is_true_junk, 0 }
|
256
|
-
d2.sentences[81..-1].each { |s| s.set :is_true_junk, 1 }
|
257
|
-
|
258
|
-
tp, fp, tn, fn = 0.0, 0.0, 0.0, 0.0
|
259
|
-
|
260
|
-
d2.sentences.map do |s|
|
261
|
-
pred = s.classify(:id3, training: d_set)
|
262
|
-
if pred == 1
|
263
|
-
tp += 1 if s.is_true_junk == 1
|
264
|
-
fp += 1 if s.is_true_junk == 0
|
265
|
-
else
|
266
|
-
tn += 1 if s.is_true_junk == 0
|
267
|
-
fn += 1 if s.is_true_junk == 1
|
268
|
-
end
|
269
|
-
end
|
270
|
-
|
271
|
-
puts "Precision: #{tp/(tp + fp)}"
|
272
|
-
puts "Recall: #{tp/(tp + fn)}"
|
273
|
-
=end
|
274
|
-
=begin
|
275
|
-
d = document 'http://louismullie.com/susan-text-scan1.jpg'
|
276
|
-
d.apply :chunk, :segment, :tokenize
|
277
|
-
d.print_tree
|
278
|
-
=end
|
279
|
-
=begin
|
280
|
-
# Syntax example
|
281
|
-
phra = phrase 'Obama', 'Sarkozy', 'Meeting'
|
282
|
-
|
283
|
-
para = paragraph 'Obama and Sarkozy met on January 1st to'
|
284
|
-
'investigate the possibility of a new rescue plan. Nicolas ' +
|
285
|
-
'Sarkozy is to meet Merkel next Tuesday in Berlin.'
|
286
|
-
|
287
|
-
sect = section title(phra), para
|
288
|
-
=end
|
289
|
-
=begin
|
290
|
-
puts "beer".plural.inspect
|
291
|
-
=end
|
292
|
-
# Treat.core.language.detect = true
|
293
|
-
# s = sentence "Du hast deiner Frau einen roten Ring gekauft."
|
294
|
-
#s.apply(:parse,:category).print_tree
|