treat 2.0.3 → 2.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/treat/config/data/languages/agnostic.rb +6 -3
- data/lib/treat/config/data/languages/english.rb +1 -1
- data/lib/treat/config/data/workers/extractors.rb +8 -0
- data/lib/treat/loaders/stanford.rb +2 -0
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/extractors/distance/levenshtein.rb +35 -0
- data/lib/treat/workers/extractors/name_tag/stanford.rb +4 -1
- data/lib/treat/workers/extractors/similarity/jaro_winkler.rb +38 -0
- data/lib/treat/workers/extractors/similarity/tf_idf.rb +19 -3
- data/lib/treat/workers/extractors/time/chronic.rb +6 -41
- data/lib/treat/workers/extractors/time/kronic.rb +20 -0
- data/lib/treat/workers/extractors/time/nickel.rb +0 -15
- data/lib/treat/workers/extractors/time/ruby.rb +2 -33
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +11 -10
- data/lib/treat/workers/processors/parsers/stanford.rb +60 -112
- data/spec/entities/collection.rb +29 -25
- data/spec/entities/document.rb +45 -44
- data/spec/entities/entity.rb +295 -294
- data/spec/entities/phrase.rb +21 -17
- data/spec/entities/token.rb +43 -40
- data/spec/entities/word.rb +5 -1
- data/spec/entities/zone.rb +26 -22
- data/spec/helper.rb +7 -2
- data/spec/learning/data_set.rb +145 -141
- data/spec/learning/export.rb +46 -42
- data/spec/learning/problem.rb +114 -110
- data/spec/learning/question.rb +46 -42
- data/spec/treat.rb +41 -37
- data/spec/workers/agnostic.rb +2 -2
- data/spec/workers/english.rb +12 -12
- metadata +7 -8
- data/files/21552208.html +0 -786
- data/files/nethttp-cheat-sheet-2940.html +0 -393
- data/lib/treat/workers/extractors/similarity/levenshtein.rb +0 -36
- data/spec/sandbox.rb +0 -294
- data/spec/workers/examples/english/mathematicians/euler.html +0 -21
@@ -1,393 +0,0 @@
|
|
1
|
-
<!doctype html>
|
2
|
-
|
3
|
-
<!--[if lt IE 7 ]> <html lang="en" class="ie6"> <![endif]-->
|
4
|
-
<!--[if IE 7 ]> <html lang="en" class="ie7"> <![endif]-->
|
5
|
-
<!--[if IE 8 ]> <html lang="en" class="ie8"> <![endif]-->
|
6
|
-
<!--[if IE 9 ]> <html lang="en" class="ie9"> <![endif]-->
|
7
|
-
<!--[if (gt IE 9)|!(IE)]><!--> <html lang="en" class="no-js"> <!--<![endif]-->
|
8
|
-
|
9
|
-
<head>
|
10
|
-
<meta charset="utf-8">
|
11
|
-
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
|
12
|
-
<title>Net::HTTP Cheat Sheet</title>
|
13
|
-
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
14
|
-
<link rel="icon" href="/favicon.png" type="image/png">
|
15
|
-
<link rel="shortcut icon" href="/favicon.png" type="image/png">
|
16
|
-
<link rel="alternate" type="application/rss+xml" title="Ruby Inside" href="http://www.rubyinside.com/feed/" />
|
17
|
-
<link rel="stylesheet" href="http://www.rubyinside.com/wp-content/themes/ri2011/css/ri.css">
|
18
|
-
<!-- script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.4.1/jquery.min.js"></script -->
|
19
|
-
<script type="text/javascript" src="http://use.typekit.com/dmj7czx.js"></script>
|
20
|
-
<script type="text/javascript">try{Typekit.load();}catch(e){}</script>
|
21
|
-
<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.7.0/jquery.min.js"></script>
|
22
|
-
|
23
|
-
<!--[if lt IE 9]>
|
24
|
-
<script src="http://html5shiv.googlecode.com/svn/trunk/html5.js"></script>
|
25
|
-
<![endif]-->
|
26
|
-
</head>
|
27
|
-
|
28
|
-
<body class="single single-post postid-2940 single-format-standard">
|
29
|
-
<div id="superheader">Want to stay on top? <a href="http://rubyweekly.com/?m">Ruby Weekly</a> is a once-weekly e-mail newsletter covering the latest Ruby and Rails news.</div>
|
30
|
-
<div id="container">
|
31
|
-
<div class="outerheader top">
|
32
|
-
<div class="right"><form method="get" id="searchform" action="/"><input type="text" value="" name="s" id="s" size="24" /><input type="submit" id="searchsubmit" value="Search" /></form> <a href="http://www.rubyinside.com/feed/"><img src="http://feeds2.feedburner.com/~fc/RubyInside?bg=99CCFF&fg=333333&anim=0" align="top" alt="Feed Icon" /></a></div>
|
33
|
-
<a href="/"><img src="http://www.rubyinside.com/wp-content/themes/ri2011/css/logo.png" id="logo" alt="Ruby Inside - A Ruby Blog" /></a>
|
34
|
-
</div>
|
35
|
-
|
36
|
-
<div class="outerheader masthead">
|
37
|
-
<div class="menu">
|
38
|
-
<ul>
|
39
|
-
<li><a href="/" class="home">Home</a></li> <li><a href="/about/">About</a></li>
|
40
|
-
<li><a href="/archives/">Archives</a></li>
|
41
|
-
<!-- li><a href="/advertise/">Advertising</a></li -->
|
42
|
-
<li><a href="http://jobs.rubyinside.com/">Ruby and Rails Jobs</a></li>
|
43
|
-
<li class="right"><a href="/feed/">RSS</a> <!-- span class="count">(24146)</span --></li>
|
44
|
-
<li class="right">
|
45
|
-
</li>
|
46
|
-
<li class="right"><a href="http://twitter.com/rubyinside">Follow us on Twitter</a> <!-- span class="count">(5548)</span --></li>
|
47
|
-
</ul>
|
48
|
-
</div>
|
49
|
-
|
50
|
-
|
51
|
-
</div>
|
52
|
-
|
53
|
-
<div id="innercontainer"><div id="page">
|
54
|
-
|
55
|
-
|
56
|
-
<div id="sidebar">
|
57
|
-
<div style="margin-top: 12px"><a href="http://twitter.com/RubyInside" class="twitter-follow-button">Follow @RubyInside</a>
|
58
|
-
<script src="http://platform.twitter.com/widgets.js" type="text/javascript"></script></div>
|
59
|
-
<div id="execphp-3" class="widget-container section widget_execphp">
|
60
|
-
<div class="execphpwidget"> <h3><a href="http://jobs.rubyinside.com/">Ruby and Rails Jobs</a></h3>
|
61
|
-
<div class="inner">
|
62
|
-
<ul>
|
63
|
-
<!-- ? readfile('http://www.rubyinside.com/jobs.html'); ? -->
|
64
|
-
<li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/778380">Front-End Engineer</a></div><div class='company'>New Relic</div><div class="location">San Francisco, California</div></li>
|
65
|
-
<li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/780028">Ruby on Rails Developer @WeedMaps</a></div><div class='company'>WeedMaps</div><div class="location">Denver, Colorado</div></li>
|
66
|
-
<li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/766252">Ruby on Rails Developer</a></div><div class='company'>CSD</div><div class="location">Austin, Texas</div></li>
|
67
|
-
<li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/776046">Front-End Developer</a></div><div class='company'>Bedrocket Media Ventures</div><div class="location">New York, New York</div></li>
|
68
|
-
<li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/769030">Telecommute-Principal Ruby Consultant</a></div><div class='company'>IMPRTL Inc</div><div class="location">Chicago, Illinois</div></li>
|
69
|
-
<li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/772705">Senior Rails Developer</a></div><div class='company'>Lightspeed Systems</div><div class="location">Austin, Texas</div></li>
|
70
|
-
<li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/776045">Lead Software Developer</a></div><div class='company'>Bedrocket Media Ventures</div><div class="location">New York, New York</div></li>
|
71
|
-
<li><div class='link'><a href="http://jobs.rubyinside.com/a/jbb/job-details/771110">Gifted RoR Software Engineer Technologist</a></div><div class='company'>Chelsmore Apartments</div><div class="location">New York, New York</div></li>
|
72
|
-
<div class="minor"><a href="http://ruby.jobamatic.com/">More jobs »</a> : <a href="/post-a-job">Post a Job</a></div>
|
73
|
-
</div>
|
74
|
-
</div>
|
75
|
-
</div><div id="text-3" class="widget-container section widget_text"> <div class="textwidget"><!-- a href="http://owningrails.com/?ref=5905208113"><img src="http://owningrails.com/images/ad-with.png" /></a --><!-- a href="http://rubyweekly.com/"><img src="http://rubyinside.com/wp-content/themes/ri20102/images/rw-ri-box.gif" /></a --><!-- a href="https://cooperpress.com/19walkthrough"><img src="/images/ruby19wt2.gif" /></a --><!-- a href="https://cooperpress.com/rubyreloaded"><img src="http://rubyinside.com/images/reloaded-riad.png" /></a --></div>
|
76
|
-
</div>
|
77
|
-
|
78
|
-
<!-- if (is_front_page() && !is_paged()) { -->
|
79
|
-
</div>
|
80
|
-
|
81
|
-
<div id="main">
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
<div class="post-2940 post type-post status-publish format-standard hentry category-reference" id="post-2940">
|
86
|
-
|
87
|
-
<div class="content">
|
88
|
-
|
89
|
-
<div class="title"><h2>Net::HTTP Cheat Sheet</h2></div>
|
90
|
-
|
91
|
-
<p class="author">By <a href="http://www.rubyinside.com/author/admin" title="View all posts by Peter Cooper">Peter Cooper</a> <span class="date">/ January 16, 2010</span></p>
|
92
|
-
<div id="thecontent"><p><img src="http://www.rubyinside.com/wp-content/uploads/2010/01/http-is-a-hamster-on-rollerblades-says-marc-andre-cournoyer.gif" width="120" height="120" alt="http-is-a-hamster-on-rollerblades-says-marc-andre-cournoyer.gif" style="float:left; margin-right:12px; margin-bottom:12px; border:1px #000000 solid;" />Norwegian Rubyist <a href="http://august.lilleaas.net/">August Lilleaas</a> has been busy putting together <a href="http://github.com/augustl/net-http-cheat-sheet">a ton of examples</a> of using the <a href="http://ruby-doc.org/stdlib/libdoc/net/http/rdoc/index.html">Net::HTTP</a> Ruby library that comes with most Ruby distributions. I asked him if it'd be okay to put some of them directly on Ruby Inside for reference purposes and he said "No problem!"</p>
|
93
|
-
<p>It's worth noting that Net::HTTP has been superseded in many areas by libraries like John Nunemaker's <a href="http://github.com/jnunemaker/httparty">HTTParty</a> and Paul DIx's high performance <a href="http://github.com/pauldix/typhoeus">Typhoeus</a>, but as part of the standard library, Net::HTTP is still a popular option though it doesn't have the easiest API to remember.</p>
|
94
|
-
<p>Here's a selection of August's examples for some of the most common operations. Want to see <i>all</i> of the examples and follow any updates made to them? Check out August's <a href="http://github.com/augustl/net-http-cheat-sheet">net-http-cheat-sheet GitHub repo</a>.</p>
|
95
|
-
<h3>Standard HTTP Request</h3>
|
96
|
-
<pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
|
97
|
-
<span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
|
98
|
-
|
99
|
-
<span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://google.com/</span><span class="punct">")</span>
|
100
|
-
|
101
|
-
<span class="comment"># Shortcut</span>
|
102
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">get_response</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">)</span>
|
103
|
-
|
104
|
-
<span class="comment"># Will print response.body</span>
|
105
|
-
<span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">get_print</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">)</span>
|
106
|
-
|
107
|
-
<span class="comment"># Full</span>
|
108
|
-
<span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
|
109
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">))</span></pre>
|
110
|
-
<h3>Basic Auth</h3>
|
111
|
-
<pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
|
112
|
-
<span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
|
113
|
-
|
114
|
-
<span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://google.com/</span><span class="punct">")</span>
|
115
|
-
|
116
|
-
<span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
|
117
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
|
118
|
-
<span class="ident">request</span><span class="punct">.</span><span class="ident">basic_auth</span><span class="punct">("</span><span class="string">username</span><span class="punct">",</span> <span class="punct">"</span><span class="string">password</span><span class="punct">")</span>
|
119
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span></pre>
|
120
|
-
<h3>Dealing with response objects</h3>
|
121
|
-
<pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
|
122
|
-
<span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
|
123
|
-
|
124
|
-
<span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://google.com/</span><span class="punct">")</span>
|
125
|
-
|
126
|
-
<span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
|
127
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
|
128
|
-
|
129
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
|
130
|
-
|
131
|
-
<span class="ident">response</span><span class="punct">.</span><span class="ident">code</span> <span class="comment"># => 301</span>
|
132
|
-
<span class="ident">response</span><span class="punct">.</span><span class="ident">body</span> <span class="comment"># => The body (HTML, XML, blob, whatever)</span>
|
133
|
-
<span class="comment"># Headers are lowercased</span>
|
134
|
-
<span class="ident">response</span><span class="punct">["</span><span class="string">cache-control</span><span class="punct">"]</span> <span class="comment"># => public, max-age=2592000</span></pre>
|
135
|
-
<h3>POST form request</h3>
|
136
|
-
<pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
|
137
|
-
<span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
|
138
|
-
|
139
|
-
<span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://example.com/search</span><span class="punct">")</span>
|
140
|
-
|
141
|
-
<span class="comment"># Shortcut</span>
|
142
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">post_form</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">,</span> <span class="punct">{"</span><span class="string">q</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">My query</span><span class="punct">",</span> <span class="punct">"</span><span class="string">per_page</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">50</span><span class="punct">"})</span>
|
143
|
-
|
144
|
-
<span class="comment"># Full control</span>
|
145
|
-
<span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
|
146
|
-
|
147
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Post</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
|
148
|
-
<span class="ident">request</span><span class="punct">.</span><span class="ident">set_form_data</span><span class="punct">({"</span><span class="string">q</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">My query</span><span class="punct">",</span> <span class="punct">"</span><span class="string">per_page</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">50</span><span class="punct">"})</span>
|
149
|
-
|
150
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span></pre>
|
151
|
-
<h3>File upload - input type="file" style</h3>
|
152
|
-
<pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
|
153
|
-
<span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
|
154
|
-
|
155
|
-
<span class="comment"># Token used to terminate the file in the post body. Make sure it is not</span>
|
156
|
-
<span class="comment"># present in the file you're uploading.</span>
|
157
|
-
<span class="constant">BOUNDARY</span> <span class="punct">=</span> <span class="punct">"</span><span class="string">AaB03x</span><span class="punct">"</span>
|
158
|
-
|
159
|
-
<span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">http://something.com/uploads</span><span class="punct">")</span>
|
160
|
-
<span class="ident">file</span> <span class="punct">=</span> <span class="punct">"</span><span class="string">/path/to/your/testfile.txt</span><span class="punct">"</span>
|
161
|
-
|
162
|
-
<span class="ident">post_body</span> <span class="punct">=</span> <span class="punct">[]</span>
|
163
|
-
<span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string">--<span class="expr">#{BOUNDARY}</span><span class="escape">rn</span></span><span class="punct">"</span>
|
164
|
-
<span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string">Content-Disposition: form-data; name=<span class="escape">"</span>datafile<span class="escape">"</span>; filename=<span class="escape">"</span><span class="expr">#{File.basename(file)}</span><span class="escape">"rn</span></span><span class="punct">"</span>
|
165
|
-
<span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string">Content-Type: text/plain<span class="escape">rn</span></span><span class="punct">"</span>
|
166
|
-
<span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string"><span class="escape">rn</span></span><span class="punct">"</span>
|
167
|
-
<span class="ident">post_body</span> <span class="punct">< <</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">(</span><span class="ident">file</span><span class="punct">)</span>
|
168
|
-
<span class="ident">post_body</span> <span class="punct">< <</span> <span class="punct">"</span><span class="string"><span class="escape">rn</span>--<span class="expr">#{BOUNDARY}</span>--<span class="escape">rn</span></span><span class="punct">"</span>
|
169
|
-
|
170
|
-
<span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
|
171
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Post</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
|
172
|
-
<span class="ident">request</span><span class="punct">.</span><span class="ident">body</span> <span class="punct">=</span> <span class="ident">post_body</span><span class="punct">.</span><span class="ident">join</span>
|
173
|
-
<span class="ident">request</span><span class="punct">["</span><span class="string">Content-Type</span><span class="punct">"]</span> <span class="punct">=</span> <span class="punct">"</span><span class="string">multipart/form-data, boundary=<span class="expr">#{BOUNDARY}</span></span><span class="punct">"</span>
|
174
|
-
|
175
|
-
<span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span></span></span></span></span></span></span></pre>
|
176
|
-
<h3>SSL/HTTPS request</h3>
|
177
|
-
<p><strong>Update: There are some good reasons why this code example is bad. It introduces a potential security vulnerability if it's essential you use the server certificate to verify the identity of the server you're connecting to. There's <a href="http://www.rubyinside.com/how-to-cure-nethttps-risky-default-https-behavior-4010.html">a fix for the issue though!</a></strong></p>
|
178
|
-
<pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/https</span><span class="punct">"</span>
|
179
|
-
<span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
|
180
|
-
|
181
|
-
<span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">https://secure.com/</span><span class="punct">")</span>
|
182
|
-
<span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
|
183
|
-
<span class="ident">http</span><span class="punct">.</span><span class="ident">use_ssl</span> <span class="punct">=</span> <span class="constant">true</span>
|
184
|
-
<span class="ident">http</span><span class="punct">.</span><span class="ident">verify_mode</span> <span class="punct">=</span> <span class="constant">OpenSSL</span><span class="punct">::</span><span class="constant">SSL</span><span class="punct">::</span><span class="constant">VERIFY_NONE</span>
|
185
|
-
|
186
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span>
|
187
|
-
|
188
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
|
189
|
-
<span class="ident">response</span><span class="punct">.</span><span class="ident">body</span>
|
190
|
-
<span class="ident">response</span><span class="punct">.</span><span class="ident">status</span>
|
191
|
-
<span class="ident">response</span><span class="punct">["</span><span class="string">header-here</span><span class="punct">"]</span> <span class="comment"># All headers are lowercase</span></pre>
|
192
|
-
<h3>SSL/HTTPS request with PEM certificate</h3>
|
193
|
-
<pre><span class="ident">require</span> <span class="punct">"</span><span class="string">net/https</span><span class="punct">"</span>
|
194
|
-
<span class="ident">require</span> <span class="punct">"</span><span class="string">uri</span><span class="punct">"</span>
|
195
|
-
|
196
|
-
<span class="ident">uri</span> <span class="punct">=</span> <span class="constant">URI</span><span class="punct">.</span><span class="ident">parse</span><span class="punct">("</span><span class="string">https://secure.com/</span><span class="punct">")</span>
|
197
|
-
<span class="ident">pem</span> <span class="punct">=</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">("</span><span class="string">/path/to/my.pem</span><span class="punct">")</span>
|
198
|
-
<span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">host</span><span class="punct">,</span> <span class="ident">uri</span><span class="punct">.</span><span class="ident">port</span><span class="punct">)</span>
|
199
|
-
<span class="ident">http</span><span class="punct">.</span><span class="ident">use_ssl</span> <span class="punct">=</span> <span class="constant">true</span>
|
200
|
-
<span class="ident">http</span><span class="punct">.</span><span class="ident">cert</span> <span class="punct">=</span> <span class="constant">OpenSSL</span><span class="punct">::</span><span class="constant">X509</span><span class="punct">::</span><span class="constant">Certificate</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">pem</span><span class="punct">)</span>
|
201
|
-
<span class="ident">http</span><span class="punct">.</span><span class="ident">key</span> <span class="punct">=</span> <span class="constant">OpenSSL</span><span class="punct">::</span><span class="constant">PKey</span><span class="punct">::</span><span class="constant">RSA</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">pem</span><span class="punct">)</span>
|
202
|
-
<span class="ident">http</span><span class="punct">.</span><span class="ident">verify_mode</span> <span class="punct">=</span> <span class="constant">OpenSSL</span><span class="punct">::</span><span class="constant">SSL</span><span class="punct">::</span><span class="constant">VERIFY_PEER</span>
|
203
|
-
|
204
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">uri</span><span class="punct">.</span><span class="ident">request_uri</span><span class="punct">)</span></pre>
|
205
|
-
<h3>REST methods</h3>
|
206
|
-
<pre><span class="comment"># Basic REST.</span>
|
207
|
-
<span class="comment"># Most REST APIs will set semantic values in response.body and response.code.</span>
|
208
|
-
<span class="ident">require</span> <span class="punct">"</span><span class="string">net/http</span><span class="punct">"</span>
|
209
|
-
|
210
|
-
<span class="ident">http</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">api.restsite.com</span><span class="punct">")</span>
|
211
|
-
|
212
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Post</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">/users</span><span class="punct">")</span>
|
213
|
-
<span class="ident">request</span><span class="punct">.</span><span class="ident">set_form_data</span><span class="punct">({"</span><span class="string">users[login]</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">quentin</span><span class="punct">"})</span>
|
214
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
|
215
|
-
<span class="comment"># Use nokogiri, hpricot, etc to parse response.body.</span>
|
216
|
-
|
217
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Get</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">/users/1</span><span class="punct">")</span>
|
218
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
|
219
|
-
<span class="comment"># As with POST, the data is in response.body.</span>
|
220
|
-
|
221
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Put</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">/users/1</span><span class="punct">")</span>
|
222
|
-
<span class="ident">request</span><span class="punct">.</span><span class="ident">set_form_data</span><span class="punct">({"</span><span class="string">users[login]</span><span class="punct">"</span> <span class="punct">=></span> <span class="punct">"</span><span class="string">changed</span><span class="punct">"})</span>
|
223
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span>
|
224
|
-
|
225
|
-
<span class="ident">request</span> <span class="punct">=</span> <span class="constant">Net</span><span class="punct">::</span><span class="constant">HTTP</span><span class="punct">::</span><span class="constant">Delete</span><span class="punct">.</span><span class="ident">new</span><span class="punct">("</span><span class="string">/users/1</span><span class="punct">")</span>
|
226
|
-
<span class="ident">response</span> <span class="punct">=</span> <span class="ident">http</span><span class="punct">.</span><span class="ident">request</span><span class="punct">(</span><span class="ident">request</span><span class="punct">)</span></pre>
|
227
|
-
<p>There are more in August's repo if you want to keep browsing..</p>
|
228
|
-
<p style="background-color: #ffc; font-weight: bold; font-size: 13px; color: #000;">Job: New Relic is <a href="http://ruby.jobamatic.com/a/jbb/job-details/165476">looking for a Ruby on Rails developer in Portland, Oregon.</a></p>
|
229
|
-
</div>
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
<!-- div style="margin-bottom: 8px; background-color: #ffc; text-align: center; padding: 6px"><a href="http://zfer.us/EKm97" style="text-decoration: none; margin: 0; padding: 0" rel="nofollow"><img src="http://www.rubyinside.com/images/railstutorial-box.gif" /></a></div -->
|
235
|
-
<!-- <a href="http://www.rubyinside.com/19walkthrough/"><img src="http://www.rubyinside.com/images/19pro.gif" /></a> -->
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
</div>
|
242
|
-
</div>
|
243
|
-
|
244
|
-
<div id="related"><h3>Related Posts</h3><ul><li><a href='http://www.rubyinside.com/cheat-sheet-for-rest-on-rails-261.html' rel='bookmark' title='Cheat Sheet for REST on Rails'>Cheat Sheet for REST on Rails</a></li>
|
245
|
-
<li><a href='http://www.rubyinside.com/quick-ruby-reference-cheat-sheet-47.html' rel='bookmark' title='Quick Ruby Reference / Cheat Sheet'>Quick Ruby Reference / Cheat Sheet</a></li>
|
246
|
-
<li><a href='http://www.rubyinside.com/ruby-on-rails-testing-cheat-sheet-206.html' rel='bookmark' title='Ruby on Rails Testing Cheat Sheet'>Ruby on Rails Testing Cheat Sheet</a></li>
|
247
|
-
</ul></div>
|
248
|
-
<div id="commentzone">
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
<h3 id="comments-title">Comments</h3>
|
253
|
-
|
254
|
-
<ol class="commentlist">
|
255
|
-
<li class="comment even thread-even depth-1" id="comment-40542">
|
256
|
-
<img alt='' src='http://0.gravatar.com/avatar/6268c7528d855f1cef5696a00d159909?s=64&d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D64&r=G' class='avatar avatar-64 photo' height='64' width='64' /> <cite><a href='http://twitter.com/peterc' rel='external nofollow' class='url'>Peter Cooper</a> says:</cite><br />
|
257
|
-
|
258
|
-
<small class="commentmetadata">January 16, 2010 at 1:02 am</small>
|
259
|
-
|
260
|
-
|
261
|
-
<p>The idea for the hamster on rollerskates issue can be discovered by checking out its filename.. :-)</p>
|
262
|
-
|
263
|
-
|
264
|
-
</li> <li class="comment odd alt thread-odd thread-alt depth-1" id="comment-40546">
|
265
|
-
<img alt='' src='http://0.gravatar.com/avatar/aa31b79adedc3f60547769f1a8971ba6?s=64&d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D64&r=G' class='avatar avatar-64 photo' height='64' width='64' /> <cite><a href='http://emmanueloga@gmail.com' rel='external nofollow' class='url'>Emmanuel</a> says:</cite><br />
|
266
|
-
|
267
|
-
<small class="commentmetadata">January 16, 2010 at 3:50 pm</small>
|
268
|
-
|
269
|
-
|
270
|
-
<p>Anybody knows if the HTTP::Net issues related with Timeout is still present in all (or any) ruby versions?</p>
|
271
|
-
<p><a href="http://blog.headius.com/2008/02/rubys-threadraise-threadkill-timeoutrb.html" rel="nofollow">http://blog.headius.com/2008/02/rubys-threadraise-threadkill-timeoutrb.html</a></p>
|
272
|
-
|
273
|
-
|
274
|
-
</li> <li class="comment even thread-even depth-1" id="comment-40550">
|
275
|
-
<img alt='' src='http://0.gravatar.com/avatar/2bfc6436d28fc4a224e3ff1702a046d0?s=64&d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D64&r=G' class='avatar avatar-64 photo' height='64' width='64' /> <cite>ratbeard says:</cite><br />
|
276
|
-
|
277
|
-
<small class="commentmetadata">January 17, 2010 at 4:23 pm</small>
|
278
|
-
|
279
|
-
|
280
|
-
<p>Another library that abstracts over Net::HTTP is Adam Wiggin's 'rest-client' library. It seems more targeted toward single requests (i.e. Restclient.get 'google.com') than mixing in and building a request class like HTTParty, though both libraries do variants of each style. It has a great interactive shell and request logging.</p>
|
281
|
-
<p><a href="http://github.com/archiloque/rest-client/" rel="nofollow">http://github.com/archiloque/rest-client/</a></p>
|
282
|
-
<p>I **highly** recommend another library authored by Mr. Wiggins that abstracts over the messy file system api, 'rush'. I'm really surprised that more libraries and apps that have a non-trivial amount of file system code don't use it.</p>
|
283
|
-
<p><a href="http://rush.heroku.com/" rel="nofollow">http://rush.heroku.com/</a></p>
|
284
|
-
<p>The code is very clean in both, I tip my hat to you Mr. Wiggins.</p>
|
285
|
-
|
286
|
-
|
287
|
-
</li> <li class="comment odd alt thread-odd thread-alt depth-1" id="comment-40556">
|
288
|
-
<img alt='' src='http://0.gravatar.com/avatar/e61f142f400df8299d37c2bce09e3478?s=64&d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D64&r=G' class='avatar avatar-64 photo' height='64' width='64' /> <cite><a href='http://august.lilleaas.net/' rel='external nofollow' class='url'>August Lilleaas</a> says:</cite><br />
|
289
|
-
|
290
|
-
<small class="commentmetadata">January 19, 2010 at 10:19 am</small>
|
291
|
-
|
292
|
-
|
293
|
-
<p>I always use one of these libraries when I can (rest-client, httparty, ...). In some cases you'd be better off without dependencies, though, such as in small shell scripts, etc.</p>
|
294
|
-
|
295
|
-
|
296
|
-
</li> </ol>
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
</div>
|
301
|
-
|
302
|
-
<h3>Other Posts to Enjoy</h3>
|
303
|
-
<div class="widget_featured-posts noborder four">
|
304
|
-
<ul class="clearfix"><li>
|
305
|
-
|
306
|
-
<a href="http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html"><img src="http://www.rubyinside.com/wp-content/plugins/wp-featured-post-with-thumbnail/scripts/timthumb.php?src=http://www.rubyinside.com/wp-content/uploads/2012/11/spaced.png&h=73&w=73&zc=1" class="alignleft" alt="The Split is Not Enough: Unicode Whitespace Shenigans for Rubyists" /></a>
|
307
|
-
<h4 class="featured-title"><a href="http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html">The Split is Not Enough: Unicode Whitespace Shenigans for Rubyists</a></h4>
|
308
|
-
</li>
|
309
|
-
<li>
|
310
|
-
|
311
|
-
<a href="http://www.rubyinside.com/mega-february-2012-ruby-news-5815.html"><img src="http://www.rubyinside.com/wp-content/plugins/wp-featured-post-with-thumbnail/scripts/timthumb.php?src=http://www.rubyinside.com/wp-content/uploads/2012/03/mega.png&h=73&w=73&zc=1" class="alignleft" alt="The Mega Ruby News and Release Roundup for February 2012" /></a>
|
312
|
-
<h4 class="featured-title"><a href="http://www.rubyinside.com/mega-february-2012-ruby-news-5815.html">The Mega Ruby News and Release Roundup for February 2012</a></h4>
|
313
|
-
</li>
|
314
|
-
<li>
|
315
|
-
|
316
|
-
<a href="http://www.rubyinside.com/sinatra-book-review-5704.html"><img src="http://www.rubyinside.com/wp-content/plugins/wp-featured-post-with-thumbnail/scripts/timthumb.php?src=http://www.rubyinside.com/wp-content/uploads/2011/12/sinatra-up.jpeg&h=73&w=73&zc=1" class="alignleft" alt="A Lagom Review of O’Reilly’s ‘Sinatra Up and Running’" /></a>
|
317
|
-
<h4 class="featured-title"><a href="http://www.rubyinside.com/sinatra-book-review-5704.html">A Lagom Review of O’Reilly’s ‘Sinatra Up and Running’</a></h4>
|
318
|
-
</li>
|
319
|
-
<li>
|
320
|
-
|
321
|
-
<a href="http://www.rubyinside.com/rubys-unary-operators-and-how-to-redefine-their-functionality-5610.html"><img src="http://www.rubyinside.com/wp-content/plugins/wp-featured-post-with-thumbnail/scripts/timthumb.php?src=http://www.rubyinside.com/wp-content/uploads/2011/11/unary.gif&h=73&w=73&zc=1" class="alignleft" alt="Ruby’s Unary Operators and How to Define Their Functionality" /></a>
|
322
|
-
<h4 class="featured-title"><a href="http://www.rubyinside.com/rubys-unary-operators-and-how-to-redefine-their-functionality-5610.html">Ruby’s Unary Operators and How to Define Their Functionality</a></h4>
|
323
|
-
</li>
|
324
|
-
</ul> </div>
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
<h3>Twitter Mentions</h3>
|
331
|
-
<div id="boastful"></div>
|
332
|
-
|
333
|
-
<div class="previousnext">
|
334
|
-
<div class="next"><a href="http://www.rubyinside.com/this-weeks-ruby-news-rspec-2-8-0-rc1-minitest-2-8-0-and-whats-new-in-bundler-1-1-5637.html" rel="next">Next Post »</a></div>
|
335
|
-
<div class="previous"><a href="http://www.rubyinside.com/the-ruby-standard-library-to-be-converted-to-gems-for-ruby-2-0-5586.html" rel="prev">« Previous Post</a></div>
|
336
|
-
</div>
|
337
|
-
|
338
|
-
<!-- <h3>Want to get up to speed with Ruby 1.9?</h3>
|
339
|
-
|
340
|
-
<p><a href="http://www.rubyinside.com/19walkthrough/"><img src="http://www.rubyinside.com/images/19pro.gif" /></a> </p>
|
341
|
-
-->
|
342
|
-
|
343
|
-
|
344
|
-
</div>
|
345
|
-
|
346
|
-
</div>
|
347
|
-
|
348
|
-
</div> <!-- inner -->
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
</div> <!-- container -->
|
354
|
-
|
355
|
-
|
356
|
-
<div id="footer">
|
357
|
-
<div class="inner">
|
358
|
-
<p>Copyright © 2006–2012 <a href="http://twitter.com/peterc">Peter Cooper</a></p>
|
359
|
-
</div>
|
360
|
-
</div>
|
361
|
-
|
362
|
-
<script type="text/javascript" src="http://engine.rubyrow.net/z/1313/adzerk1_4_16_19,adzerk2_4_16_19,adzerk3_4_16_19,adzerk4_4_16_19,adzerk5_4_16_19,adzerk6_4_16_19,adzerk7_4_16_19,adzerk8_4_16_19"></script>
|
363
|
-
<script type="text/javascript" src="http://www.rubyinside.com/wp-content/themes/ri2011/jquery.boastful.js"></script>
|
364
|
-
|
365
|
-
<script type="text/javascript">
|
366
|
-
$(document).ready(function() {
|
367
|
-
$('#boastful').boastful();
|
368
|
-
});
|
369
|
-
</script>
|
370
|
-
|
371
|
-
<script type="text/javascript">
|
372
|
-
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
|
373
|
-
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
|
374
|
-
</script>
|
375
|
-
<script type="text/javascript">
|
376
|
-
var pageTracker = _gat._getTracker("UA-2237791-3");
|
377
|
-
pageTracker._initData();
|
378
|
-
pageTracker._trackPageview();
|
379
|
-
</script>
|
380
|
-
|
381
|
-
|
382
|
-
</body>
|
383
|
-
</html>
|
384
|
-
|
385
|
-
<!-- div style="float:right"><a href="http://twitter.com/share" class="twitter-share-button" data-url="http://www.rubyinside.com/rubys-unary-operators-and-how-to-redefine-their-functionality-5610.html" data-counturl="http://www.rubyinside.com/rubys-unary-operators-and-how-to-redefine-their-functionality-5610.html" data-text="Ruby’s Unary Operators and How to Define Their Functionality" data-count="horizontal">Tweet</a></div -->
|
386
|
-
<!-- div style="margin-bottom: -12px; margin-top: -10px"><iframe src="http://www.facebook.com/plugins/like.php?href=http%3A%2F%2Fwww.rubyinside.com%2Frubys-unary-operators-and-how-to-redefine-their-functionality-5610.html&layout=standard&show_faces=false&width=420&action=like&colorscheme=light&height=26" scrolling="no" frameborder="0" style="border:none; overflow:hidden; width:420px; height:26px;" allowTransparency="true"></iframe></div -->
|
387
|
-
<!-- div><script src="http://connect.facebook.net/en_US/all.js#xfbml=1"></script><fb:like href="http://x.com/" show_faces="false" width="450"></fb:like></div -->
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
<!-- Dynamic page generated in 0.233 seconds. -->
|
392
|
-
<!-- Cached page generated by WP-Super-Cache on 2013-01-04 01:31:41 -->
|
393
|
-
<!-- super cache -->
|
@@ -1,36 +0,0 @@
|
|
1
|
-
class Treat::Workers::Extractors::Similarity
|
2
|
-
# Default options.
|
3
|
-
DefaultOptions = {
|
4
|
-
with: '',
|
5
|
-
ins_cost: 1,
|
6
|
-
del_cost: 1,
|
7
|
-
sub_cost: 1
|
8
|
-
}
|
9
|
-
# Return the levensthein distance between
|
10
|
-
# two strings taking into account the costs
|
11
|
-
# of insertion, deletion, and substitution.
|
12
|
-
# Used by did_you_mean? to detect typos.
|
13
|
-
def self.similarity(entity, options)
|
14
|
-
first, other = entity.to_s, options[:with].to_s
|
15
|
-
options = DefaultOptions.merge(options)
|
16
|
-
other, ins, del, sub, = options[:with],
|
17
|
-
options[:inst_cost], options[:del_cost],
|
18
|
-
options[:sub_cost]
|
19
|
-
fill, dm = [0] * (first.length - 1).abs,
|
20
|
-
[(0..first.length).collect { |i| i * ins}]
|
21
|
-
for i in 1..other.length
|
22
|
-
dm[i] = [i * del, fill.flatten]
|
23
|
-
end
|
24
|
-
for i in 1..other.length
|
25
|
-
for j in 1..first.length
|
26
|
-
dm[i][j] = [
|
27
|
-
dm[i-1][j-1] + (first[i-1] ==
|
28
|
-
other[i-1] ? 0 : sub), dm[i][j-1] +
|
29
|
-
ins, dm[i-1][j] + del
|
30
|
-
].min
|
31
|
-
end
|
32
|
-
end
|
33
|
-
dm[other.length][first.length]
|
34
|
-
end
|
35
|
-
|
36
|
-
end
|
data/spec/sandbox.rb
DELETED
@@ -1,294 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
require_relative '../lib/treat'
|
3
|
-
|
4
|
-
Treat.databases.mongo.db = 'treat_test'
|
5
|
-
Treat.libraries.stanford.model_path =
|
6
|
-
'/ruby/stanford-core-nlp-minimal/models/'
|
7
|
-
Treat.libraries.stanford.jar_path =
|
8
|
-
'/ruby/stanford-core-nlp-minimal/bin/'
|
9
|
-
Treat.libraries.punkt.model_path =
|
10
|
-
'/ruby/punkt/models/'
|
11
|
-
Treat.libraries.reuters.model_path =
|
12
|
-
'/ruby/reuters/models/'
|
13
|
-
|
14
|
-
# include Treat::Core::DSL
|
15
|
-
|
16
|
-
Treat::Builder.new do
|
17
|
-
s = sentence "Hello, world!"
|
18
|
-
s.print_tree
|
19
|
-
end
|
20
|
-
|
21
|
-
p = paragraph('A walk in the park. A trip on a boat.').segment
|
22
|
-
p.visualize :dot, file: 'test.dot'
|
23
|
-
=begin
|
24
|
-
|
25
|
-
g = group("I was running")
|
26
|
-
puts g.tag.inspect
|
27
|
-
|
28
|
-
Treat.libraries.stanford.jar_path = '/ruby/treat/bin/'
|
29
|
-
Treat.libraries.stanford.model_path = '/ruby/treat/models/'
|
30
|
-
|
31
|
-
p = paragraph
|
32
|
-
s = sentence
|
33
|
-
w = word
|
34
|
-
|
35
|
-
p = phrase 'hello world'
|
36
|
-
e = email 'louis@gmail.com'
|
37
|
-
|
38
|
-
d = question(:is_feature, :word)
|
39
|
-
=end
|
40
|
-
#d = document Treat.paths.spec + 'workers/examples/english/economist/hungarys_troubles.txt'
|
41
|
-
#d.apply :chunk, :segment, :tokenize, :tag, :category, :name_tag
|
42
|
-
#d.print_tree
|
43
|
-
#d = document Treat.paths.spec + 'workers/examples/english/economist/saving_the_euro.odt'
|
44
|
-
#d.print_tree
|
45
|
-
=begin
|
46
|
-
d = document 'test.htm'
|
47
|
-
d.apply :chunk
|
48
|
-
#d.serialize :yaml, file: 'test444.yaml'
|
49
|
-
d.set :test, 2
|
50
|
-
d.serialize :mongo, db: 'test'
|
51
|
-
d.set :test, 3
|
52
|
-
d.serialize :mongo, db: 'test'
|
53
|
-
d.apply :segment, :tokenize, :tag, :category
|
54
|
-
puts d.verb_count
|
55
|
-
#d2 = document id: d.id, db: 'test'
|
56
|
-
d2 = document 'features.test' => 3, db: 'test'
|
57
|
-
d2.apply :segment, :tokenize, :tag, :category
|
58
|
-
puts d2.verb_count
|
59
|
-
#d.print_tree
|
60
|
-
#s = document 'http://www.economist.com'
|
61
|
-
|
62
|
-
p = phrase 'hello', 'world', '!'
|
63
|
-
puts p.to_s
|
64
|
-
puts p.to_str
|
65
|
-
=end
|
66
|
-
|
67
|
-
=begin
|
68
|
-
### Super basics.
|
69
|
-
puts p.value
|
70
|
-
|
71
|
-
p << 'bitch'
|
72
|
-
p << word('hello')
|
73
|
-
puts p.to_s
|
74
|
-
puts p.to_str
|
75
|
-
puts p.value
|
76
|
-
puts p.to_ary.inspect
|
77
|
-
=end
|
78
|
-
|
79
|
-
=begin
|
80
|
-
|
81
|
-
### Configuration
|
82
|
-
|
83
|
-
# A boolean value indicating whether to silence the output of external libraries (e.g. Stanford tools, Enju, LDA, Ruby-FANN) when they are used.
|
84
|
-
puts Treat.core.verbosity.silence
|
85
|
-
# A boolean value indicating whether to explain the steps that Treat is performing.
|
86
|
-
puts Treat.core.verbosity.debug
|
87
|
-
# A boolean value indicating whether Treat should try to detect the language of newly input text.
|
88
|
-
puts Treat.core.language.detect
|
89
|
-
# The language to default to when detection is off.
|
90
|
-
puts Treat.core.language.default
|
91
|
-
# A symbol representing the finest level at which language detection should be performed if language detection is turned on.
|
92
|
-
puts Treat.core.language.detect_at
|
93
|
-
|
94
|
-
# A directory in which to create temporary files.
|
95
|
-
puts Treat.paths.tmp
|
96
|
-
# A directory in which to store downloaded files.
|
97
|
-
puts Treat.paths.files
|
98
|
-
# A directory containing trained models for various tasks.
|
99
|
-
puts Treat.paths.models
|
100
|
-
# A directory containing the spec files.
|
101
|
-
puts Treat.paths.spec
|
102
|
-
# A directory containing executables and JAR files.
|
103
|
-
puts Treat.paths.bin
|
104
|
-
puts Treat.paths.lib
|
105
|
-
|
106
|
-
# Set up Mongoid.
|
107
|
-
Treat.databases.mongo.db = 'your_database'
|
108
|
-
Treat.databases.mongo.host = 'localhost'
|
109
|
-
Treat.databases.mongo.port = '27017'
|
110
|
-
|
111
|
-
# Transparent string casting.
|
112
|
-
s = 'inflection'.stem
|
113
|
-
# is equivalent to
|
114
|
-
s = 'inflection'.to_entity.stem
|
115
|
-
# which comes down to
|
116
|
-
s = word('inflection').stem
|
117
|
-
|
118
|
-
# Transparent number casting.
|
119
|
-
n = 2.ordinal
|
120
|
-
# is equivalent to
|
121
|
-
s = 2.to_entity.ordinal
|
122
|
-
# which comes down to
|
123
|
-
s = number(2).ordinal
|
124
|
-
=end
|
125
|
-
=begin
|
126
|
-
### BASIC USAGE
|
127
|
-
|
128
|
-
# Create a sentence
|
129
|
-
s = sentence 'Those who dream by day know of at least ' +
|
130
|
-
'19 things that escape those who dream only at night.'
|
131
|
-
|
132
|
-
# Tokenize and tag it.
|
133
|
-
s.tokenize.tag
|
134
|
-
|
135
|
-
# View the sentence structure.
|
136
|
-
s.print_tree
|
137
|
-
|
138
|
-
# Iterate over the tokens.
|
139
|
-
s.each_token do |tok|
|
140
|
-
puts tok.value
|
141
|
-
puts tok.type
|
142
|
-
end
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
# Arrays instead of iterators.
|
147
|
-
(s.nouns + s.adjectives).each do |word|
|
148
|
-
puts word.synonyms
|
149
|
-
puts word.antonyms
|
150
|
-
end
|
151
|
-
|
152
|
-
# Functions on numbers.
|
153
|
-
s.each_number do |num|
|
154
|
-
puts num.ordinal
|
155
|
-
puts num.cardinal
|
156
|
-
end
|
157
|
-
|
158
|
-
# See all the annotations.
|
159
|
-
s.each do |tok|
|
160
|
-
puts tok.inspect
|
161
|
-
end
|
162
|
-
|
163
|
-
# Lazy way of doing all of the above.
|
164
|
-
s = sentence 'Those who dream by day know of at least ' +
|
165
|
-
'19 things that escape those who dream only at night.'
|
166
|
-
|
167
|
-
s.apply :tokenize, :tag, :category,
|
168
|
-
:stem, :hyponyms, :hypernyms,
|
169
|
-
:antonyms, :ordinal, :cardinal
|
170
|
-
|
171
|
-
=end
|
172
|
-
|
173
|
-
=begin
|
174
|
-
### A BIT MORE ADVANCED USAGE
|
175
|
-
|
176
|
-
section = section "Obama-Sarkozy Meeting\n" +
|
177
|
-
"Obama and Sarkozy met on January 1st to investigate " +
|
178
|
-
"the possibility of a new rescue plan. President " +
|
179
|
-
"Sarkozy is to meet Merkel next Tuesday in Berlin."
|
180
|
-
|
181
|
-
# Chunk: split the titles and paragraphs.
|
182
|
-
# Segment: perform sentence segmentation.
|
183
|
-
# Parse: parse the syntax of each sentence.
|
184
|
-
section.apply :chunk, :segment, :parse
|
185
|
-
|
186
|
-
# View the tree structure.
|
187
|
-
section.print_tree
|
188
|
-
|
189
|
-
# Get some basic info on the text.
|
190
|
-
puts section.title
|
191
|
-
puts section.sentence_count
|
192
|
-
puts section.word_count
|
193
|
-
|
194
|
-
section.apply :category
|
195
|
-
puts section.noun_count
|
196
|
-
puts section.frequency_of 'president'
|
197
|
-
|
198
|
-
section.each_phrase_with_tag('NP') do |phrase|
|
199
|
-
puts phrase.to_s
|
200
|
-
end
|
201
|
-
|
202
|
-
=end
|
203
|
-
=begin
|
204
|
-
### URL documents, XML serialization.
|
205
|
-
|
206
|
-
urls = ['http://www.cbc.ca/news/world/story/2012/11/25/snc-lavalin-ben-aissa-charges.html',
|
207
|
-
'http://www.cbc.ca/news/world/story/2012/11/25/egypt.html', 'http://www.cbc.ca/news/canada/prince-edward-island/story/2012/11/25/pei-murder-arrest-stlucia.html', 'http://www.cbc.ca/news/world/story/2012/11/25/bangladesh-garment-factory-fire.html']
|
208
|
-
|
209
|
-
c = collection
|
210
|
-
urls.each { |url| c << document(url) }
|
211
|
-
|
212
|
-
# View the collection.
|
213
|
-
c.print_tree
|
214
|
-
|
215
|
-
c.apply :chunk, :segment, :tokenize
|
216
|
-
c.serialize :xml, :file => 'test.xml'
|
217
|
-
|
218
|
-
# Reopen the collection.
|
219
|
-
c = collection 'test.xml'
|
220
|
-
|
221
|
-
# View it again.
|
222
|
-
c.print_tree
|
223
|
-
=end
|
224
|
-
=begin
|
225
|
-
include Treat::Core::DSL
|
226
|
-
|
227
|
-
# Show progress bars for download.
|
228
|
-
Treat.core.verbosity.silence = false
|
229
|
-
# Explain what Treat is doing.
|
230
|
-
Treat.core.verbosity.debug = true
|
231
|
-
|
232
|
-
# Define the question "is it junk?" on sentences.
|
233
|
-
qn = question(:is_junk, :sentence)
|
234
|
-
|
235
|
-
# Frame the problem as depending on punctuation
|
236
|
-
# count and word count for each sentence.
|
237
|
-
pb = problem(qn,
|
238
|
-
feature(:punctuation_count),
|
239
|
-
feature(:word_count) )
|
240
|
-
|
241
|
-
# Get some web documents to work on.
|
242
|
-
url1 = 'http://en.wikipedia.org/wiki/NOD_mouse'
|
243
|
-
url2 = 'http://en.wikipedia.org/wiki/Academic_studies_about_Wikipedia'
|
244
|
-
d1, d2 = document(url1), document(url2)
|
245
|
-
|
246
|
-
# Process both of our documents.
|
247
|
-
[d1,d2].apply(:chunk, :segment, :tokenize)
|
248
|
-
|
249
|
-
# Answer our problem to create a training set.
|
250
|
-
d1.sentences[0..17].each { |s| s.set :is_junk, 0 }
|
251
|
-
d1.sentences[17..-1].each { |s| s.set :is_junk, 1 }
|
252
|
-
d_set = d1.export(pb)
|
253
|
-
|
254
|
-
# Define our gold standard results for evaluation.
|
255
|
-
d2.sentences[0..81].each { |s| s.set :is_true_junk, 0 }
|
256
|
-
d2.sentences[81..-1].each { |s| s.set :is_true_junk, 1 }
|
257
|
-
|
258
|
-
tp, fp, tn, fn = 0.0, 0.0, 0.0, 0.0
|
259
|
-
|
260
|
-
d2.sentences.map do |s|
|
261
|
-
pred = s.classify(:id3, training: d_set)
|
262
|
-
if pred == 1
|
263
|
-
tp += 1 if s.is_true_junk == 1
|
264
|
-
fp += 1 if s.is_true_junk == 0
|
265
|
-
else
|
266
|
-
tn += 1 if s.is_true_junk == 0
|
267
|
-
fn += 1 if s.is_true_junk == 1
|
268
|
-
end
|
269
|
-
end
|
270
|
-
|
271
|
-
puts "Precision: #{tp/(tp + fp)}"
|
272
|
-
puts "Recall: #{tp/(tp + fn)}"
|
273
|
-
=end
|
274
|
-
=begin
|
275
|
-
d = document 'http://louismullie.com/susan-text-scan1.jpg'
|
276
|
-
d.apply :chunk, :segment, :tokenize
|
277
|
-
d.print_tree
|
278
|
-
=end
|
279
|
-
=begin
|
280
|
-
# Syntax example
|
281
|
-
phra = phrase 'Obama', 'Sarkozy', 'Meeting'
|
282
|
-
|
283
|
-
para = paragraph 'Obama and Sarkozy met on January 1st to'
|
284
|
-
'investigate the possibility of a new rescue plan. Nicolas ' +
|
285
|
-
'Sarkozy is to meet Merkel next Tuesday in Berlin.'
|
286
|
-
|
287
|
-
sect = section title(phra), para
|
288
|
-
=end
|
289
|
-
=begin
|
290
|
-
puts "beer".plural.inspect
|
291
|
-
=end
|
292
|
-
# Treat.core.language.detect = true
|
293
|
-
# s = sentence "Du hast deiner Frau einen roten Ring gekauft."
|
294
|
-
#s.apply(:parse,:category).print_tree
|