extcite 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +13 -11
- data/README.md +1 -1
- data/doc/Array.html +205 -0
- data/doc/Configuration.html +296 -0
- data/doc/Extcite.html +1122 -0
- data/doc/Hash.html +380 -0
- data/doc/String.html +289 -0
- data/doc/Textminer.html +601 -0
- data/doc/Textminer/Fetch.html +447 -0
- data/doc/Textminer/Mined.html +509 -0
- data/doc/Textminer/Miner.html +385 -0
- data/doc/Textminer/Request.html +669 -0
- data/doc/Textminer/Response.html +923 -0
- data/doc/_index.html +135 -0
- data/doc/class_list.html +51 -0
- data/doc/css/common.css +1 -0
- data/doc/css/full_list.css +58 -0
- data/doc/css/style.css +496 -0
- data/doc/file.README.html +139 -0
- data/doc/file_list.html +56 -0
- data/doc/frames.html +17 -0
- data/doc/index.html +139 -0
- data/doc/js/app.js +314 -0
- data/doc/js/full_list.js +216 -0
- data/doc/js/jquery.js +4 -0
- data/doc/method_list.html +155 -0
- data/doc/top-level-namespace.html +397 -0
- data/extcite.gemspec +14 -4
- data/lib/extcite.rb +39 -24
- data/lib/extcite/version.rb +1 -1
- metadata +65 -9
data/doc/Textminer.html
ADDED
@@ -0,0 +1,601 @@
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
2
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
3
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
4
|
+
<head>
|
5
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
6
|
+
<title>
|
7
|
+
Module: Textminer
|
8
|
+
|
9
|
+
— Documentation by YARD 0.8.7.6
|
10
|
+
|
11
|
+
</title>
|
12
|
+
|
13
|
+
<link rel="stylesheet" href="css/style.css" type="text/css" charset="utf-8" />
|
14
|
+
|
15
|
+
<link rel="stylesheet" href="css/common.css" type="text/css" charset="utf-8" />
|
16
|
+
|
17
|
+
<script type="text/javascript" charset="utf-8">
|
18
|
+
hasFrames = window.top.frames.main ? true : false;
|
19
|
+
relpath = '';
|
20
|
+
framesUrl = "frames.html#!Textminer.html";
|
21
|
+
</script>
|
22
|
+
|
23
|
+
|
24
|
+
<script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
|
25
|
+
|
26
|
+
<script type="text/javascript" charset="utf-8" src="js/app.js"></script>
|
27
|
+
|
28
|
+
|
29
|
+
</head>
|
30
|
+
<body>
|
31
|
+
<div id="header">
|
32
|
+
<div id="menu">
|
33
|
+
|
34
|
+
<a href="_index.html">Index (T)</a> »
|
35
|
+
|
36
|
+
|
37
|
+
<span class="title">Textminer</span>
|
38
|
+
|
39
|
+
|
40
|
+
<div class="noframes"><span class="title">(</span><a href="." target="_top">no frames</a><span class="title">)</span></div>
|
41
|
+
</div>
|
42
|
+
|
43
|
+
<div id="search">
|
44
|
+
|
45
|
+
<a class="full_list_link" id="class_list_link"
|
46
|
+
href="class_list.html">
|
47
|
+
Class List
|
48
|
+
</a>
|
49
|
+
|
50
|
+
<a class="full_list_link" id="method_list_link"
|
51
|
+
href="method_list.html">
|
52
|
+
Method List
|
53
|
+
</a>
|
54
|
+
|
55
|
+
<a class="full_list_link" id="file_list_link"
|
56
|
+
href="file_list.html">
|
57
|
+
File List
|
58
|
+
</a>
|
59
|
+
|
60
|
+
</div>
|
61
|
+
<div class="clear"></div>
|
62
|
+
</div>
|
63
|
+
|
64
|
+
<iframe id="search_frame"></iframe>
|
65
|
+
|
66
|
+
<div id="content"><h1>Module: Textminer
|
67
|
+
|
68
|
+
|
69
|
+
|
70
|
+
</h1>
|
71
|
+
|
72
|
+
<dl class="box">
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
<dt class="r1">Extended by:</dt>
|
77
|
+
<dd class="r1"><span class='object_link'><a href="Configuration.html" title="Configuration (module)">Configuration</a></span></dd>
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
<dt class="r2 last">Defined in:</dt>
|
86
|
+
<dd class="r2 last">lib/textminer/mined.rb<span class="defines">,<br />
|
87
|
+
lib/textminer.rb,<br /> lib/textminer/miner.rb,<br /> lib/textminer/request.rb,<br /> lib/textminer/version.rb,<br /> lib/textminer/response.rb</span>
|
88
|
+
</dd>
|
89
|
+
|
90
|
+
</dl>
|
91
|
+
<div class="clear"></div>
|
92
|
+
|
93
|
+
<h2>Overview</h2><div class="docstring">
|
94
|
+
<div class="discussion">
|
95
|
+
|
96
|
+
<p>Textminer::Miner</p>
|
97
|
+
|
98
|
+
<p>Class to give back text mining object</p>
|
99
|
+
|
100
|
+
|
101
|
+
</div>
|
102
|
+
</div>
|
103
|
+
<div class="tags">
|
104
|
+
|
105
|
+
|
106
|
+
</div><h2>Defined Under Namespace</h2>
|
107
|
+
<p class="children">
|
108
|
+
|
109
|
+
|
110
|
+
|
111
|
+
|
112
|
+
<strong class="classes">Classes:</strong> <span class='object_link'><a href="Textminer/Mined.html" title="Textminer::Mined (class)">Mined</a></span>, <span class='object_link'><a href="Textminer/Miner.html" title="Textminer::Miner (class)">Miner</a></span>, <span class='object_link'><a href="Textminer/Request.html" title="Textminer::Request (class)">Request</a></span>, <span class='object_link'><a href="Textminer/Response.html" title="Textminer::Response (class)">Response</a></span>
|
113
|
+
|
114
|
+
|
115
|
+
</p>
|
116
|
+
|
117
|
+
<h2>Constant Summary</h2>
|
118
|
+
|
119
|
+
<dl class="constants">
|
120
|
+
|
121
|
+
<dt id="VERSION-constant" class="">VERSION =
|
122
|
+
|
123
|
+
</dt>
|
124
|
+
<dd><pre class="code"><span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>0.1.5</span><span class='tstring_end'>"</span></span></pre></dd>
|
125
|
+
|
126
|
+
</dl>
|
127
|
+
|
128
|
+
|
129
|
+
|
130
|
+
|
131
|
+
|
132
|
+
|
133
|
+
|
134
|
+
|
135
|
+
|
136
|
+
<h2>
|
137
|
+
Class Method Summary
|
138
|
+
<small>(<a href="#" class="summary_toggle">collapse</a>)</small>
|
139
|
+
</h2>
|
140
|
+
|
141
|
+
<ul class="summary">
|
142
|
+
|
143
|
+
<li class="public ">
|
144
|
+
<span class="summary_signature">
|
145
|
+
|
146
|
+
<a href="#extract-class_method" title="extract (class method)">+ (Object) <strong>extract</strong>(path) </a>
|
147
|
+
|
148
|
+
|
149
|
+
|
150
|
+
</span>
|
151
|
+
|
152
|
+
|
153
|
+
|
154
|
+
|
155
|
+
|
156
|
+
|
157
|
+
|
158
|
+
|
159
|
+
|
160
|
+
<span class="summary_desc"><div class='inline'>
|
161
|
+
<p>Thin layer around pdf-reader gem's PDF::Reader.</p>
|
162
|
+
</div></span>
|
163
|
+
|
164
|
+
</li>
|
165
|
+
|
166
|
+
|
167
|
+
<li class="public ">
|
168
|
+
<span class="summary_signature">
|
169
|
+
|
170
|
+
<a href="#fetch-class_method" title="fetch (class method)">+ (Mined) <strong>fetch</strong>(url) </a>
|
171
|
+
|
172
|
+
|
173
|
+
|
174
|
+
</span>
|
175
|
+
|
176
|
+
|
177
|
+
|
178
|
+
|
179
|
+
|
180
|
+
|
181
|
+
|
182
|
+
|
183
|
+
|
184
|
+
<span class="summary_desc"><div class='inline'>
|
185
|
+
<p>Get full text.</p>
|
186
|
+
</div></span>
|
187
|
+
|
188
|
+
</li>
|
189
|
+
|
190
|
+
|
191
|
+
<li class="public ">
|
192
|
+
<span class="summary_signature">
|
193
|
+
|
194
|
+
<a href="#search-class_method" title="search (class method)">+ (Array) <strong>search</strong>(doi: nil, member: nil, filter: nil, limit: nil, options: nil) </a>
|
195
|
+
|
196
|
+
|
197
|
+
|
198
|
+
</span>
|
199
|
+
|
200
|
+
|
201
|
+
|
202
|
+
|
203
|
+
|
204
|
+
|
205
|
+
|
206
|
+
|
207
|
+
|
208
|
+
<span class="summary_desc"><div class='inline'>
|
209
|
+
<p>Search for papers and get full text links.</p>
|
210
|
+
</div></span>
|
211
|
+
|
212
|
+
</li>
|
213
|
+
|
214
|
+
|
215
|
+
</ul>
|
216
|
+
|
217
|
+
|
218
|
+
|
219
|
+
|
220
|
+
|
221
|
+
|
222
|
+
|
223
|
+
|
224
|
+
|
225
|
+
|
226
|
+
|
227
|
+
<h3 class="inherited">Methods included from <span class='object_link'><a href="Configuration.html" title="Configuration (module)">Configuration</a></span></h3>
|
228
|
+
<p class="inherited"><span class='object_link'><a href="Configuration.html#configuration-instance_method" title="Configuration#configuration (method)">configuration</a></span>, <span class='object_link'><a href="Configuration.html#define_setting-instance_method" title="Configuration#define_setting (method)">define_setting</a></span></p>
|
229
|
+
|
230
|
+
|
231
|
+
<div id="class_method_details" class="method_details_list">
|
232
|
+
<h2>Class Method Details</h2>
|
233
|
+
|
234
|
+
|
235
|
+
<div class="method_details first">
|
236
|
+
<h3 class="signature first" id="extract-class_method">
|
237
|
+
|
238
|
+
+ (<tt>Object</tt>) <strong>extract</strong>(path)
|
239
|
+
|
240
|
+
|
241
|
+
|
242
|
+
|
243
|
+
|
244
|
+
</h3><div class="docstring">
|
245
|
+
<div class="discussion">
|
246
|
+
|
247
|
+
<p>Thin layer around pdf-reader gem's PDF::Reader</p>
|
248
|
+
|
249
|
+
<p>This method is used internally within fetch to parse PDFs.</p>
|
250
|
+
|
251
|
+
|
252
|
+
</div>
|
253
|
+
</div>
|
254
|
+
<div class="tags">
|
255
|
+
|
256
|
+
<div class="examples">
|
257
|
+
<p class="tag_title">Examples:</p>
|
258
|
+
|
259
|
+
|
260
|
+
<pre class="example code"><code><span class='id identifier rubyid_require'>require</span> <span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>textminer</span><span class='tstring_end'>'</span></span>
|
261
|
+
<span class='id identifier rubyid_res'>res</span> <span class='op'>=</span> <span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='label'>member:</span> <span class='int'>2258</span><span class='comma'>,</span> <span class='label'>filter:</span> <span class='lbrace'>{</span><span class='label'>has_full_text:</span> <span class='kw'>true</span><span class='rbrace'>}</span><span class='rparen'>)</span><span class='semicolon'>;</span>
|
262
|
+
<span class='id identifier rubyid_links'>links</span> <span class='op'>=</span> <span class='id identifier rubyid_res'>res</span><span class='period'>.</span><span class='id identifier rubyid_links_pdf'>links_pdf</span><span class='lparen'>(</span><span class='kw'>true</span><span class='rparen'>)</span><span class='semicolon'>;</span>
|
263
|
+
<span class='comment'># Get full text for an article
|
264
|
+
</span><span class='id identifier rubyid_out'>out</span> <span class='op'>=</span> <span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_fetch'>fetch</span><span class='lparen'>(</span><span class='label'>url:</span> <span class='id identifier rubyid_links'>links</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='rparen'>)</span><span class='semicolon'>;</span>
|
265
|
+
<span class='comment'># extract pdf to text
|
266
|
+
</span><span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_extract'>extract</span><span class='lparen'>(</span><span class='id identifier rubyid_out'>out</span><span class='period'>.</span><span class='id identifier rubyid_path'>path</span><span class='rparen'>)</span></code></pre>
|
267
|
+
|
268
|
+
</div>
|
269
|
+
<p class="tag_title">Parameters:</p>
|
270
|
+
<ul class="param">
|
271
|
+
|
272
|
+
<li>
|
273
|
+
|
274
|
+
<span class='name'>path</span>
|
275
|
+
|
276
|
+
|
277
|
+
<span class='type'>(<tt>String</tt>)</span>
|
278
|
+
|
279
|
+
|
280
|
+
|
281
|
+
—
|
282
|
+
<div class='inline'>
|
283
|
+
<p>Path to a pdf file downloaded via <span class='object_link'><a href="#fetch-class_method" title="Textminer.fetch (method)">fetch</a></span>, or another way.</p>
|
284
|
+
</div>
|
285
|
+
|
286
|
+
</li>
|
287
|
+
|
288
|
+
</ul>
|
289
|
+
|
290
|
+
|
291
|
+
</div><table class="source_code">
|
292
|
+
<tr>
|
293
|
+
<td>
|
294
|
+
<pre class="lines">
|
295
|
+
|
296
|
+
|
297
|
+
140
|
298
|
+
141
|
299
|
+
142
|
300
|
+
143</pre>
|
301
|
+
</td>
|
302
|
+
<td>
|
303
|
+
<pre class="code"><span class="info file"># File 'lib/textminer.rb', line 140</span>
|
304
|
+
|
305
|
+
<span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_extract'>extract</span><span class='lparen'>(</span><span class='id identifier rubyid_path'>path</span><span class='rparen'>)</span>
|
306
|
+
<span class='id identifier rubyid_rr'>rr</span> <span class='op'>=</span> <span class='const'>PDF</span><span class='op'>::</span><span class='const'>Reader</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='id identifier rubyid_path'>path</span><span class='rparen'>)</span>
|
307
|
+
<span class='id identifier rubyid_rr'>rr</span><span class='period'>.</span><span class='id identifier rubyid_pages'>pages</span><span class='period'>.</span><span class='id identifier rubyid_map'>map</span> <span class='lbrace'>{</span> <span class='op'>|</span><span class='id identifier rubyid_page'>page</span><span class='op'>|</span> <span class='id identifier rubyid_page'>page</span><span class='period'>.</span><span class='id identifier rubyid_text'>text</span> <span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_join'>join</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>\n</span><span class='tstring_end'>"</span></span><span class='rparen'>)</span>
|
308
|
+
<span class='kw'>end</span></pre>
|
309
|
+
</td>
|
310
|
+
</tr>
|
311
|
+
</table>
|
312
|
+
</div>
|
313
|
+
|
314
|
+
<div class="method_details ">
|
315
|
+
<h3 class="signature " id="fetch-class_method">
|
316
|
+
|
317
|
+
+ (<tt><span class='object_link'><a href="Textminer/Mined.html" title="Textminer::Mined (class)">Mined</a></span></tt>) <strong>fetch</strong>(url)
|
318
|
+
|
319
|
+
|
320
|
+
|
321
|
+
|
322
|
+
|
323
|
+
</h3><div class="docstring">
|
324
|
+
<div class="discussion">
|
325
|
+
|
326
|
+
<p>Get full text</p>
|
327
|
+
|
328
|
+
<p>Work easily for open access papers, but for closed. For non-OA papers, use
|
329
|
+
Crossref's Text and Data Mining service, which requires authentication
|
330
|
+
and pre-authorized IP address. Go to <a
|
331
|
+
href="https://apps.crossref.org/clickthrough/researchers">apps.crossref.org/clickthrough/researchers</a>
|
332
|
+
to sign up for the TDM service, to get your key. The only publishers taking
|
333
|
+
part at this time are Elsevier and Wiley.</p>
|
334
|
+
|
335
|
+
<p>the url requested, the file path, and parsing the plain text, XML, or
|
336
|
+
extracting text from the pdf.</p>
|
337
|
+
|
338
|
+
|
339
|
+
</div>
|
340
|
+
</div>
|
341
|
+
<div class="tags">
|
342
|
+
|
343
|
+
<div class="examples">
|
344
|
+
<p class="tag_title">Examples:</p>
|
345
|
+
|
346
|
+
|
347
|
+
<pre class="example code"><code><span class='id identifier rubyid_require'>require</span> <span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>textminer</span><span class='tstring_end'>'</span></span>
|
348
|
+
<span class='comment'># Set authorization
|
349
|
+
</span><span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_configuration'>configuration</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_config'>config</span><span class='op'>|</span>
|
350
|
+
<span class='id identifier rubyid_config'>config</span><span class='period'>.</span><span class='id identifier rubyid_tdm_key'>tdm_key</span> <span class='op'>=</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'><your key></span><span class='tstring_end'>"</span></span>
|
351
|
+
<span class='kw'>end</span>
|
352
|
+
<span class='comment'># Get some elsevier works
|
353
|
+
</span><span class='id identifier rubyid_res'>res</span> <span class='op'>=</span> <span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='label'>member:</span> <span class='int'>78</span><span class='comma'>,</span> <span class='label'>filter:</span> <span class='lbrace'>{</span><span class='label'>has_full_text:</span> <span class='kw'>true</span><span class='rbrace'>}</span><span class='rparen'>)</span><span class='semicolon'>;</span>
|
354
|
+
<span class='id identifier rubyid_links'>links</span> <span class='op'>=</span> <span class='id identifier rubyid_res'>res</span><span class='period'>.</span><span class='id identifier rubyid_links_xml'>links_xml</span><span class='lparen'>(</span><span class='kw'>true</span><span class='rparen'>)</span><span class='semicolon'>;</span>
|
355
|
+
<span class='comment'># Get full text for an article
|
356
|
+
</span><span class='id identifier rubyid_out'>out</span> <span class='op'>=</span> <span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_fetch'>fetch</span><span class='lparen'>(</span><span class='label'>url:</span> <span class='id identifier rubyid_links'>links</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='rparen'>)</span><span class='semicolon'>;</span>
|
357
|
+
<span class='id identifier rubyid_out'>out</span><span class='period'>.</span><span class='id identifier rubyid_url'>url</span>
|
358
|
+
<span class='id identifier rubyid_out'>out</span><span class='period'>.</span><span class='id identifier rubyid_path'>path</span>
|
359
|
+
<span class='id identifier rubyid_out'>out</span><span class='period'>.</span><span class='id identifier rubyid_type'>type</span>
|
360
|
+
<span class='id identifier rubyid_xml'>xml</span> <span class='op'>=</span> <span class='id identifier rubyid_out'>out</span><span class='period'>.</span><span class='id identifier rubyid_parse'>parse</span><span class='lparen'>(</span><span class='rparen'>)</span>
|
361
|
+
<span class='id identifier rubyid_puts'>puts</span> <span class='id identifier rubyid_xml'>xml</span>
|
362
|
+
<span class='id identifier rubyid_xml'>xml</span><span class='period'>.</span><span class='id identifier rubyid_xpath'>xpath</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>//xocs:cover-date-text</span><span class='tstring_end'>'</span></span><span class='comma'>,</span> <span class='id identifier rubyid_xml'>xml</span><span class='period'>.</span><span class='id identifier rubyid_root'>root</span><span class='period'>.</span><span class='id identifier rubyid_namespaces'>namespaces</span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_text'>text</span>
|
363
|
+
<span class='comment'># Get lots of articles
|
364
|
+
</span><span class='id identifier rubyid_links'>links</span> <span class='op'>=</span> <span class='id identifier rubyid_links'>links</span><span class='lbracket'>[</span><span class='int'>1</span><span class='op'>..</span><span class='int'>3</span><span class='rbracket'>]</span>
|
365
|
+
<span class='id identifier rubyid_out'>out</span> <span class='op'>=</span> <span class='id identifier rubyid_links'>links</span><span class='period'>.</span><span class='id identifier rubyid_collect'>collect</span><span class='lbrace'>{</span> <span class='op'>|</span><span class='id identifier rubyid_x'>x</span><span class='op'>|</span> <span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_fetch'>fetch</span><span class='lparen'>(</span><span class='label'>url:</span> <span class='id identifier rubyid_x'>x</span><span class='rparen'>)</span> <span class='rbrace'>}</span>
|
366
|
+
<span class='id identifier rubyid_out'>out</span><span class='period'>.</span><span class='id identifier rubyid_collect'>collect</span><span class='lbrace'>{</span> <span class='op'>|</span><span class='id identifier rubyid_z'>z</span><span class='op'>|</span> <span class='id identifier rubyid_z'>z</span><span class='period'>.</span><span class='id identifier rubyid_path'>path</span> <span class='rbrace'>}</span>
|
367
|
+
<span class='id identifier rubyid_out'>out</span><span class='period'>.</span><span class='id identifier rubyid_collect'>collect</span><span class='lbrace'>{</span> <span class='op'>|</span><span class='id identifier rubyid_z'>z</span><span class='op'>|</span> <span class='id identifier rubyid_z'>z</span><span class='period'>.</span><span class='id identifier rubyid_parse'>parse</span> <span class='rbrace'>}</span>
|
368
|
+
<span class='id identifier rubyid_zz'>zz</span> <span class='op'>=</span> <span class='id identifier rubyid_out'>out</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_parse'>parse</span>
|
369
|
+
<span class='id identifier rubyid_zz'>zz</span><span class='period'>.</span><span class='id identifier rubyid_xpath'>xpath</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>//xocs:cover-date-text</span><span class='tstring_end'>'</span></span><span class='comma'>,</span> <span class='id identifier rubyid_zz'>zz</span><span class='period'>.</span><span class='id identifier rubyid_root'>root</span><span class='period'>.</span><span class='id identifier rubyid_namespaces'>namespaces</span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_text'>text</span>
|
370
|
+
|
371
|
+
<span class='comment'>## plain text
|
372
|
+
</span><span class='comment'># get full text links, here doing xml
|
373
|
+
</span><span class='id identifier rubyid_links'>links</span> <span class='op'>=</span> <span class='id identifier rubyid_res'>res</span><span class='period'>.</span><span class='id identifier rubyid_links_plain'>links_plain</span><span class='lparen'>(</span><span class='kw'>true</span><span class='rparen'>)</span><span class='semicolon'>;</span>
|
374
|
+
<span class='comment'># Get full text for an article
|
375
|
+
</span><span class='id identifier rubyid_res'>res</span> <span class='op'>=</span> <span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_fetch'>fetch</span><span class='lparen'>(</span><span class='label'>url:</span> <span class='id identifier rubyid_links'>links</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='rparen'>)</span><span class='semicolon'>;</span>
|
376
|
+
<span class='id identifier rubyid_res'>res</span><span class='period'>.</span><span class='id identifier rubyid_url'>url</span>
|
377
|
+
<span class='id identifier rubyid_res'>res</span><span class='period'>.</span><span class='id identifier rubyid_parse'>parse</span>
|
378
|
+
|
379
|
+
<span class='comment'># With open access content - using Pensoft
|
380
|
+
</span><span class='id identifier rubyid_res'>res</span> <span class='op'>=</span> <span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='label'>member:</span> <span class='int'>2258</span><span class='comma'>,</span> <span class='label'>filter:</span> <span class='lbrace'>{</span><span class='label'>has_full_text:</span> <span class='kw'>true</span><span class='rbrace'>}</span><span class='rparen'>)</span><span class='semicolon'>;</span>
|
381
|
+
<span class='id identifier rubyid_links'>links</span> <span class='op'>=</span> <span class='id identifier rubyid_res'>res</span><span class='period'>.</span><span class='id identifier rubyid_links_xml'>links_xml</span><span class='lparen'>(</span><span class='kw'>true</span><span class='rparen'>)</span><span class='semicolon'>;</span>
|
382
|
+
<span class='comment'># Get full text for an article
|
383
|
+
</span><span class='id identifier rubyid_res'>res</span> <span class='op'>=</span> <span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_fetch'>fetch</span><span class='lparen'>(</span><span class='label'>url:</span> <span class='id identifier rubyid_links'>links</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='rparen'>)</span><span class='semicolon'>;</span>
|
384
|
+
<span class='id identifier rubyid_res'>res</span><span class='period'>.</span><span class='id identifier rubyid_url'>url</span>
|
385
|
+
<span class='id identifier rubyid_res'>res</span><span class='period'>.</span><span class='id identifier rubyid_parse'>parse</span>
|
386
|
+
|
387
|
+
<span class='comment'># OA content - pdfs, using pensoft again
|
388
|
+
</span><span class='id identifier rubyid_res'>res</span> <span class='op'>=</span> <span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='label'>member:</span> <span class='int'>2258</span><span class='comma'>,</span> <span class='label'>filter:</span> <span class='lbrace'>{</span><span class='label'>has_full_text:</span> <span class='kw'>true</span><span class='rbrace'>}</span><span class='rparen'>)</span><span class='semicolon'>;</span>
|
389
|
+
<span class='id identifier rubyid_links'>links</span> <span class='op'>=</span> <span class='id identifier rubyid_res'>res</span><span class='period'>.</span><span class='id identifier rubyid_links_pdf'>links_pdf</span><span class='lparen'>(</span><span class='kw'>true</span><span class='rparen'>)</span><span class='semicolon'>;</span>
|
390
|
+
<span class='comment'># Get full text for an article
|
391
|
+
</span><span class='id identifier rubyid_res'>res</span> <span class='op'>=</span> <span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_fetch'>fetch</span><span class='lparen'>(</span><span class='label'>url:</span> <span class='id identifier rubyid_links'>links</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='rparen'>)</span><span class='semicolon'>;</span>
|
392
|
+
<span class='comment'># url used
|
393
|
+
</span><span class='id identifier rubyid_res'>res</span><span class='period'>.</span><span class='id identifier rubyid_url'>url</span>
|
394
|
+
<span class='comment'># document type
|
395
|
+
</span><span class='id identifier rubyid_res'>res</span><span class='period'>.</span><span class='id identifier rubyid_type'>type</span>
|
396
|
+
<span class='comment'># document path on your machine
|
397
|
+
</span><span class='id identifier rubyid_res'>res</span><span class='period'>.</span><span class='id identifier rubyid_path'>path</span>
|
398
|
+
<span class='comment'># get text
|
399
|
+
</span><span class='id identifier rubyid_res'>res</span><span class='period'>.</span><span class='id identifier rubyid_parse'>parse</span></code></pre>
|
400
|
+
|
401
|
+
</div>
|
402
|
+
<p class="tag_title">Parameters:</p>
|
403
|
+
<ul class="param">
|
404
|
+
|
405
|
+
<li>
|
406
|
+
|
407
|
+
<span class='name'>url</span>
|
408
|
+
|
409
|
+
|
410
|
+
<span class='type'>(<tt>String</tt>)</span>
|
411
|
+
|
412
|
+
|
413
|
+
|
414
|
+
—
|
415
|
+
<div class='inline'>
|
416
|
+
<p>A url for full text</p>
|
417
|
+
</div>
|
418
|
+
|
419
|
+
</li>
|
420
|
+
|
421
|
+
</ul>
|
422
|
+
|
423
|
+
<p class="tag_title">Returns:</p>
|
424
|
+
<ul class="return">
|
425
|
+
|
426
|
+
<li>
|
427
|
+
|
428
|
+
|
429
|
+
<span class='type'>(<tt><span class='object_link'><a href="Textminer/Mined.html" title="Textminer::Mined (class)">Mined</a></span></tt>)</span>
|
430
|
+
|
431
|
+
|
432
|
+
|
433
|
+
—
|
434
|
+
<div class='inline'>
|
435
|
+
<p>An object of class Mined, with methods for extracting</p>
|
436
|
+
</div>
|
437
|
+
|
438
|
+
</li>
|
439
|
+
|
440
|
+
</ul>
|
441
|
+
|
442
|
+
</div><table class="source_code">
|
443
|
+
<tr>
|
444
|
+
<td>
|
445
|
+
<pre class="lines">
|
446
|
+
|
447
|
+
|
448
|
+
120
|
449
|
+
121
|
450
|
+
122</pre>
|
451
|
+
</td>
|
452
|
+
<td>
|
453
|
+
<pre class="code"><span class="info file"># File 'lib/textminer.rb', line 120</span>
|
454
|
+
|
455
|
+
<span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_fetch'>fetch</span><span class='lparen'>(</span><span class='id identifier rubyid_url'>url</span><span class='rparen'>)</span>
|
456
|
+
<span class='const'>Miner</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='id identifier rubyid_url'>url</span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_perform'>perform</span>
|
457
|
+
<span class='kw'>end</span></pre>
|
458
|
+
</td>
|
459
|
+
</tr>
|
460
|
+
</table>
|
461
|
+
</div>
|
462
|
+
|
463
|
+
<div class="method_details ">
|
464
|
+
<h3 class="signature " id="search-class_method">
|
465
|
+
|
466
|
+
+ (<tt><span class='object_link'><a href="Array.html" title="Array (class)">Array</a></span></tt>) <strong>search</strong>(doi: nil, member: nil, filter: nil, limit: nil, options: nil)
|
467
|
+
|
468
|
+
|
469
|
+
|
470
|
+
|
471
|
+
|
472
|
+
</h3><div class="docstring">
|
473
|
+
<div class="discussion">
|
474
|
+
|
475
|
+
<p>Search for papers and get full text links</p>
|
476
|
+
|
477
|
+
|
478
|
+
</div>
|
479
|
+
</div>
|
480
|
+
<div class="tags">
|
481
|
+
|
482
|
+
<div class="examples">
|
483
|
+
<p class="tag_title">Examples:</p>
|
484
|
+
|
485
|
+
|
486
|
+
<pre class="example code"><code><span class='id identifier rubyid_require'>require</span> <span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>textminer</span><span class='tstring_end'>'</span></span>
|
487
|
+
<span class='comment'># link to full text available
|
488
|
+
</span><span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='label'>doi:</span> <span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>10.3897/phytokeys.42.7604</span><span class='tstring_end'>'</span></span><span class='rparen'>)</span>
|
489
|
+
<span class='comment'># no link to full text available
|
490
|
+
</span><span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='label'>doi:</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>10.1371/journal.pone.0000308</span><span class='tstring_end'>"</span></span><span class='rparen'>)</span>
|
491
|
+
<span class='comment'># many DOIs at once
|
492
|
+
</span><span class='id identifier rubyid_require'>require</span> <span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>serrano</span><span class='tstring_end'>'</span></span>
|
493
|
+
<span class='id identifier rubyid_dois'>dois</span> <span class='op'>=</span> <span class='const'>Serrano</span><span class='period'>.</span><span class='id identifier rubyid_random_dois'>random_dois</span><span class='lparen'>(</span><span class='label'>sample:</span> <span class='int'>6</span><span class='rparen'>)</span>
|
494
|
+
<span class='id identifier rubyid_res'>res</span> <span class='op'>=</span> <span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='label'>doi:</span> <span class='id identifier rubyid_dois'>dois</span><span class='rparen'>)</span>
|
495
|
+
<span class='id identifier rubyid_res'>res</span> <span class='op'>=</span> <span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='label'>doi:</span> <span class='lbracket'>[</span><span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>10.3897/phytokeys.42.7604</span><span class='tstring_end'>"</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>10.3897/zookeys.516.9439</span><span class='tstring_end'>"</span></span><span class='rbracket'>]</span><span class='rparen'>)</span>
|
496
|
+
<span class='id identifier rubyid_res'>res</span><span class='period'>.</span><span class='id identifier rubyid_links'>links</span>
|
497
|
+
<span class='id identifier rubyid_res'>res</span><span class='period'>.</span><span class='id identifier rubyid_links_pdf'>links_pdf</span>
|
498
|
+
<span class='id identifier rubyid_res'>res</span><span class='period'>.</span><span class='id identifier rubyid_links_xml'>links_xml</span>
|
499
|
+
<span class='id identifier rubyid_res'>res</span><span class='period'>.</span><span class='id identifier rubyid_links_plain'>links_plain</span>
|
500
|
+
<span class='comment'># only full text available
|
501
|
+
</span><span class='id identifier rubyid_x'>x</span> <span class='op'>=</span> <span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='label'>doi:</span> <span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>10.3816/clm.2001.n.006</span><span class='tstring_end'>'</span></span><span class='rparen'>)</span>
|
502
|
+
<span class='id identifier rubyid_x'>x</span><span class='period'>.</span><span class='id identifier rubyid_links_xml'>links_xml</span>
|
503
|
+
<span class='id identifier rubyid_x'>x</span><span class='period'>.</span><span class='id identifier rubyid_links_plain'>links_plain</span>
|
504
|
+
<span class='id identifier rubyid_x'>x</span><span class='period'>.</span><span class='id identifier rubyid_links_pdf'>links_pdf</span>
|
505
|
+
<span class='comment'># no dois
|
506
|
+
</span><span class='id identifier rubyid_x'>x</span> <span class='op'>=</span> <span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='label'>filter:</span> <span class='lbrace'>{</span><span class='label'>has_full_text:</span> <span class='kw'>true</span><span class='rbrace'>}</span><span class='rparen'>)</span>
|
507
|
+
<span class='id identifier rubyid_x'>x</span><span class='period'>.</span><span class='id identifier rubyid_links_xml'>links_xml</span>
|
508
|
+
<span class='id identifier rubyid_x'>x</span><span class='period'>.</span><span class='id identifier rubyid_links_plain'>links_plain</span>
|
509
|
+
<span class='id identifier rubyid_x'>x</span> <span class='op'>=</span> <span class='const'>Textminer</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='label'>member:</span> <span class='int'>311</span><span class='comma'>,</span> <span class='label'>filter:</span> <span class='lbrace'>{</span><span class='label'>has_full_text:</span> <span class='kw'>true</span><span class='rbrace'>}</span><span class='rparen'>)</span>
|
510
|
+
<span class='id identifier rubyid_x'>x</span><span class='period'>.</span><span class='id identifier rubyid_links_pdf'>links_pdf</span></code></pre>
|
511
|
+
|
512
|
+
</div>
|
513
|
+
<p class="tag_title">Parameters:</p>
|
514
|
+
<ul class="param">
|
515
|
+
|
516
|
+
<li>
|
517
|
+
|
518
|
+
<span class='name'>doi</span>
|
519
|
+
|
520
|
+
|
521
|
+
<span class='type'>(<tt><span class='object_link'><a href="Array.html" title="Array (class)">Array</a></span></tt>)</span>
|
522
|
+
|
523
|
+
|
524
|
+
|
525
|
+
—
|
526
|
+
<div class='inline'>
|
527
|
+
<p>A DOI, digital object identifier</p>
|
528
|
+
</div>
|
529
|
+
|
530
|
+
</li>
|
531
|
+
|
532
|
+
<li>
|
533
|
+
|
534
|
+
<span class='name'>options</span>
|
535
|
+
|
536
|
+
|
537
|
+
<span class='type'>(<tt><span class='object_link'><a href="Array.html" title="Array (class)">Array</a></span></tt>)</span>
|
538
|
+
|
539
|
+
|
540
|
+
|
541
|
+
—
|
542
|
+
<div class='inline'>
|
543
|
+
<p>Curl request options</p>
|
544
|
+
</div>
|
545
|
+
|
546
|
+
</li>
|
547
|
+
|
548
|
+
</ul>
|
549
|
+
|
550
|
+
<p class="tag_title">Returns:</p>
|
551
|
+
<ul class="return">
|
552
|
+
|
553
|
+
<li>
|
554
|
+
|
555
|
+
|
556
|
+
<span class='type'>(<tt><span class='object_link'><a href="Array.html" title="Array (class)">Array</a></span></tt>)</span>
|
557
|
+
|
558
|
+
|
559
|
+
|
560
|
+
—
|
561
|
+
<div class='inline'>
|
562
|
+
<p>the output</p>
|
563
|
+
</div>
|
564
|
+
|
565
|
+
</li>
|
566
|
+
|
567
|
+
</ul>
|
568
|
+
|
569
|
+
</div><table class="source_code">
|
570
|
+
<tr>
|
571
|
+
<td>
|
572
|
+
<pre class="lines">
|
573
|
+
|
574
|
+
|
575
|
+
48
|
576
|
+
49
|
577
|
+
50</pre>
|
578
|
+
</td>
|
579
|
+
<td>
|
580
|
+
<pre class="code"><span class="info file"># File 'lib/textminer.rb', line 48</span>
|
581
|
+
|
582
|
+
<span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='label'>doi:</span> <span class='kw'>nil</span><span class='comma'>,</span> <span class='label'>member:</span> <span class='kw'>nil</span><span class='comma'>,</span> <span class='label'>filter:</span> <span class='kw'>nil</span><span class='comma'>,</span> <span class='label'>limit:</span> <span class='kw'>nil</span><span class='comma'>,</span> <span class='label'>options:</span> <span class='kw'>nil</span><span class='rparen'>)</span>
|
583
|
+
<span class='const'>Request</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='id identifier rubyid_doi'>doi</span><span class='comma'>,</span> <span class='id identifier rubyid_member'>member</span><span class='comma'>,</span> <span class='id identifier rubyid_filter'>filter</span><span class='comma'>,</span> <span class='id identifier rubyid_limit'>limit</span><span class='comma'>,</span> <span class='id identifier rubyid_options'>options</span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_perform'>perform</span>
|
584
|
+
<span class='kw'>end</span></pre>
|
585
|
+
</td>
|
586
|
+
</tr>
|
587
|
+
</table>
|
588
|
+
</div>
|
589
|
+
|
590
|
+
</div>
|
591
|
+
|
592
|
+
</div>
|
593
|
+
|
594
|
+
<div id="footer">
|
595
|
+
Generated on Fri Dec 4 20:45:51 2015 by
|
596
|
+
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
597
|
+
0.8.7.6 (ruby-2.2.3).
|
598
|
+
</div>
|
599
|
+
|
600
|
+
</body>
|
601
|
+
</html>
|