immunoscore_results_aggregator 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +47 -0
- data/README.md +54 -0
- data/Rakefile +2 -0
- data/bin/immunoscore_cli.rb +76 -0
- data/config.rb +14 -0
- data/data_struct.rb +280 -0
- data/docs/aggregator.html +200 -0
- data/docs/analyzer.html +1121 -0
- data/docs/data_struct.html +504 -0
- data/docs/database_connection.html +93 -0
- data/docs/docco.css +506 -0
- data/docs/exporter.html +216 -0
- data/docs/immunoscore_results_loader.html +485 -0
- data/docs/public/fonts/aller-bold.eot +0 -0
- data/docs/public/fonts/aller-bold.ttf +0 -0
- data/docs/public/fonts/aller-bold.woff +0 -0
- data/docs/public/fonts/aller-light.eot +0 -0
- data/docs/public/fonts/aller-light.ttf +0 -0
- data/docs/public/fonts/aller-light.woff +0 -0
- data/docs/public/fonts/novecento-bold.eot +0 -0
- data/docs/public/fonts/novecento-bold.ttf +0 -0
- data/docs/public/fonts/novecento-bold.woff +0 -0
- data/docs/public/stylesheets/normalize.css +375 -0
- data/docs/semicolon_cleaner.html +231 -0
- data/immunoscore_results_aggregator.gemspec +32 -0
- data/immunoscore_results_aggregator.rb +9 -0
- data/lib/analyzer.rb +664 -0
- data/lib/data_struct.rb +280 -0
- data/lib/database_connection.rb +4 -0
- data/lib/exporter.rb +76 -0
- data/lib/immunoscore_results_aggregator/version.rb +3 -0
- data/lib/immunoscore_results_loader.rb +218 -0
- data/lib/mongo_aggregator.rb +106 -0
- data/lib/semicolon_cleaner.rb +68 -0
- data/license.txt +21 -0
- metadata +243 -0
data/docs/analyzer.html
ADDED
@@ -0,0 +1,1121 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
|
3
|
+
<html>
|
4
|
+
<head>
|
5
|
+
<title>analyzer.rb</title>
|
6
|
+
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
|
7
|
+
<meta name="viewport" content="width=device-width, target-densitydpi=160dpi, initial-scale=1.0; maximum-scale=1.0; user-scalable=0;">
|
8
|
+
<link rel="stylesheet" media="all" href="docco.css" />
|
9
|
+
</head>
|
10
|
+
<body>
|
11
|
+
<div id="container">
|
12
|
+
<div id="background"></div>
|
13
|
+
|
14
|
+
<ul id="jump_to">
|
15
|
+
<li>
|
16
|
+
<a class="large" href="javascript:void(0);">Jump To …</a>
|
17
|
+
<a class="small" href="javascript:void(0);">+</a>
|
18
|
+
<div id="jump_wrapper">
|
19
|
+
<div id="jump_page">
|
20
|
+
|
21
|
+
|
22
|
+
<a class="source" href="analyzer.html">
|
23
|
+
analyzer.rb
|
24
|
+
</a>
|
25
|
+
|
26
|
+
|
27
|
+
<a class="source" href="cli.html">
|
28
|
+
cli.rb
|
29
|
+
</a>
|
30
|
+
|
31
|
+
|
32
|
+
<a class="source" href="data_struct.html">
|
33
|
+
data_struct.rb
|
34
|
+
</a>
|
35
|
+
|
36
|
+
|
37
|
+
<a class="source" href="database_connection.html">
|
38
|
+
database_connection.rb
|
39
|
+
</a>
|
40
|
+
|
41
|
+
|
42
|
+
<a class="source" href="exporter.html">
|
43
|
+
exporter.rb
|
44
|
+
</a>
|
45
|
+
|
46
|
+
|
47
|
+
<a class="source" href="immunoscore_results_loader.html">
|
48
|
+
immunoscore_results_loader.rb
|
49
|
+
</a>
|
50
|
+
|
51
|
+
|
52
|
+
<a class="source" href="mongo_aggregator.html">
|
53
|
+
mongo_aggregator.rb
|
54
|
+
</a>
|
55
|
+
|
56
|
+
|
57
|
+
<a class="source" href="semicolon_cleaner.html">
|
58
|
+
semicolon_cleaner.rb
|
59
|
+
</a>
|
60
|
+
|
61
|
+
</div>
|
62
|
+
</li>
|
63
|
+
</ul>
|
64
|
+
|
65
|
+
<ul class="sections">
|
66
|
+
|
67
|
+
<li id="title">
|
68
|
+
<div class="annotation">
|
69
|
+
<h1>analyzer.rb</h1>
|
70
|
+
</div>
|
71
|
+
</li>
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
<li id="section-1">
|
76
|
+
<div class="annotation">
|
77
|
+
|
78
|
+
<div class="pilwrap ">
|
79
|
+
<a class="pilcrow" href="#section-1">¶</a>
|
80
|
+
</div>
|
81
|
+
|
82
|
+
</div>
|
83
|
+
|
84
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-keyword">require</span> <span class="hljs-string">'bundler/setup'</span>
|
85
|
+
|
86
|
+
<span class="hljs-keyword">require</span> <span class="hljs-string">"mongo_mapper"</span>
|
87
|
+
<span class="hljs-keyword">require</span> <span class="hljs-string">"csv"</span>
|
88
|
+
<span class="hljs-keyword">require</span> <span class="hljs-string">"gibberish"</span>
|
89
|
+
<span class="hljs-keyword">require</span> <span class="hljs-string">'bicrypt'</span>
|
90
|
+
<span class="hljs-keyword">require</span> <span class="hljs-string">"chronic"</span>
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
require_relative <span class="hljs-string">"database_connection"</span>
|
95
|
+
|
96
|
+
|
97
|
+
|
98
|
+
<span class="hljs-constant">MongoMapper</span>.database = <span class="hljs-constant">DATABASE_NAME</span>
|
99
|
+
|
100
|
+
|
101
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>prompt(*args)
|
102
|
+
print(*args)
|
103
|
+
gets.strip
|
104
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
105
|
+
|
106
|
+
</li>
|
107
|
+
|
108
|
+
|
109
|
+
<li id="section-2">
|
110
|
+
<div class="annotation">
|
111
|
+
|
112
|
+
<div class="pilwrap ">
|
113
|
+
<a class="pilcrow" href="#section-2">¶</a>
|
114
|
+
</div>
|
115
|
+
<h2 id="encryption-and-decryption">Encryption and decryption</h2>
|
116
|
+
<p>monkeypatching String</p>
|
117
|
+
|
118
|
+
</div>
|
119
|
+
|
120
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-class"><span class="hljs-keyword">class</span> <span class="hljs-title">String</span></span></pre></div></div>
|
121
|
+
|
122
|
+
</li>
|
123
|
+
|
124
|
+
|
125
|
+
<li id="section-3">
|
126
|
+
<div class="annotation">
|
127
|
+
|
128
|
+
<div class="pilwrap ">
|
129
|
+
<a class="pilcrow" href="#section-3">¶</a>
|
130
|
+
</div>
|
131
|
+
<h2 id="cypher-p-p">cypher p p</h2>
|
132
|
+
|
133
|
+
</div>
|
134
|
+
|
135
|
+
<div class="content"><div class='highlight'><pre> <span class="hljs-function"><span class="hljs-keyword">def</span> </span><span class="hljs-keyword">self</span>.set_encryption
|
136
|
+
cypher=prompt <span class="hljs-string">"enter encryption cypher: "</span>
|
137
|
+
<span class="hljs-variable">$e</span> = <span class="hljs-constant">BiCrypt</span>.new(cypher)
|
138
|
+
<span class="hljs-keyword">end</span>
|
139
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>decrypt
|
140
|
+
<span class="hljs-keyword">if</span> <span class="hljs-variable">$e</span>==<span class="hljs-keyword">nil</span> <span class="hljs-keyword">then</span> <span class="hljs-constant">String</span>.set_encryption <span class="hljs-keyword">end</span>
|
141
|
+
<span class="hljs-variable">$e</span>.decrypt_string <span class="hljs-constant">Base64</span>.decode64 <span class="hljs-keyword">self</span>.encode(<span class="hljs-string">'ascii-8bit'</span>)
|
142
|
+
<span class="hljs-keyword">end</span>
|
143
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>encrypt
|
144
|
+
<span class="hljs-keyword">if</span> <span class="hljs-variable">$e</span>==<span class="hljs-keyword">nil</span> <span class="hljs-keyword">then</span> <span class="hljs-constant">String</span>.set_encryption <span class="hljs-keyword">end</span>
|
145
|
+
<span class="hljs-constant">Base64</span>.encode64(<span class="hljs-variable">$e</span>.encrypt_string(<span class="hljs-keyword">self</span>)).encode(<span class="hljs-string">'utf-8'</span>)
|
146
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
147
|
+
|
148
|
+
</li>
|
149
|
+
|
150
|
+
|
151
|
+
<li id="section-4">
|
152
|
+
<div class="annotation">
|
153
|
+
|
154
|
+
<div class="pilwrap ">
|
155
|
+
<a class="pilcrow" href="#section-4">¶</a>
|
156
|
+
</div>
|
157
|
+
<p>some normalization of SS entries necessary</p>
|
158
|
+
|
159
|
+
</div>
|
160
|
+
|
161
|
+
<div class="content"><div class='highlight'><pre> <span class="hljs-function"><span class="hljs-keyword">def</span> </span>md5
|
162
|
+
<span class="hljs-constant">Gibberish::MD5</span> (<span class="hljs-keyword">self</span>.gsub <span class="hljs-string">"-"</span>,<span class="hljs-string">""</span>).strip()
|
163
|
+
<span class="hljs-keyword">end</span>
|
164
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
165
|
+
|
166
|
+
</li>
|
167
|
+
|
168
|
+
|
169
|
+
<li id="section-5">
|
170
|
+
<div class="annotation">
|
171
|
+
|
172
|
+
<div class="pilwrap ">
|
173
|
+
<a class="pilcrow" href="#section-5">¶</a>
|
174
|
+
</div>
|
175
|
+
<h2 id="creates-a-mongomapper-class">creates a mongomapper class</h2>
|
176
|
+
|
177
|
+
</div>
|
178
|
+
|
179
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-function"><span class="hljs-keyword">def</span> </span>make_mongo_class class_name
|
180
|
+
<span class="hljs-keyword">self</span>.instance_variable_set <span class="hljs-string">"@<span class="hljs-subst">#{class_name}</span>"</span>, <span class="hljs-constant">Class</span>.new
|
181
|
+
c=<span class="hljs-keyword">self</span>.instance_variable_get <span class="hljs-string">"@<span class="hljs-subst">#{class_name}</span>"</span>
|
182
|
+
c.class_eval <span class="hljs-keyword">do</span>
|
183
|
+
<span class="hljs-keyword">include</span> <span class="hljs-constant">MongoMapper::Document</span>
|
184
|
+
<span class="hljs-keyword">end</span>
|
185
|
+
c
|
186
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
187
|
+
|
188
|
+
</li>
|
189
|
+
|
190
|
+
|
191
|
+
<li id="section-6">
|
192
|
+
<div class="annotation">
|
193
|
+
|
194
|
+
<div class="pilwrap ">
|
195
|
+
<a class="pilcrow" href="#section-6">¶</a>
|
196
|
+
</div>
|
197
|
+
<h2 id="utility-for-extrapolating-mongotype">Utility for extrapolating MongoType</h2>
|
198
|
+
<p>need dates…</p>
|
199
|
+
|
200
|
+
</div>
|
201
|
+
|
202
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-class"><span class="hljs-keyword">class</span> <span class="hljs-title">String</span></span>
|
203
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>correct
|
204
|
+
<span class="hljs-keyword">case</span> <span class="hljs-keyword">self</span>
|
205
|
+
<span class="hljs-keyword">when</span> <span class="hljs-string">"String"</span>
|
206
|
+
<span class="hljs-string">"String"</span>
|
207
|
+
<span class="hljs-keyword">when</span> <span class="hljs-string">"Fixnum"</span>
|
208
|
+
<span class="hljs-string">"Integer"</span>
|
209
|
+
<span class="hljs-keyword">end</span>
|
210
|
+
<span class="hljs-keyword">end</span>
|
211
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
212
|
+
|
213
|
+
</li>
|
214
|
+
|
215
|
+
|
216
|
+
<li id="section-7">
|
217
|
+
<div class="annotation">
|
218
|
+
|
219
|
+
<div class="pilwrap ">
|
220
|
+
<a class="pilcrow" href="#section-7">¶</a>
|
221
|
+
</div>
|
222
|
+
<h1 id="array-of-rows-to-excel-file">Array of rows to excel file</h1>
|
223
|
+
|
224
|
+
</div>
|
225
|
+
|
226
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-class"><span class="hljs-keyword">class</span> <span class="hljs-title">Array</span></span>
|
227
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>to_table
|
228
|
+
t=<span class="hljs-constant">Tempfile</span>.new(<span class="hljs-string">"foo"</span>)
|
229
|
+
<span class="hljs-keyword">self</span>.each <span class="hljs-keyword">do</span> |row|
|
230
|
+
t.write row.to_csv
|
231
|
+
<span class="hljs-keyword">end</span>
|
232
|
+
t.close
|
233
|
+
puts t.path
|
234
|
+
new_table=<span class="hljs-constant">CSV</span>.table t.path
|
235
|
+
`rm <span class="hljs-comment">#{t.path}`</span>
|
236
|
+
new_table
|
237
|
+
<span class="hljs-keyword">end</span>
|
238
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
239
|
+
|
240
|
+
</li>
|
241
|
+
|
242
|
+
|
243
|
+
<li id="section-8">
|
244
|
+
<div class="annotation">
|
245
|
+
|
246
|
+
<div class="pilwrap ">
|
247
|
+
<a class="pilcrow" href="#section-8">¶</a>
|
248
|
+
</div>
|
249
|
+
<h2 id="splits-name-in-array-components">Splits name in array components</h2>
|
250
|
+
<p>always returns array</p>
|
251
|
+
|
252
|
+
</div>
|
253
|
+
|
254
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-function"><span class="hljs-keyword">def</span> </span>name_split text_string
|
255
|
+
<span class="hljs-keyword">if</span> text_string.index <span class="hljs-string">" "</span> <span class="hljs-keyword">or</span> text_string.index <span class="hljs-string">","</span>
|
256
|
+
<span class="hljs-keyword">return</span> (text_string.split(<span class="hljs-string">" "</span>).split(<span class="hljs-string">","</span>) ).flatten
|
257
|
+
<span class="hljs-keyword">else</span>
|
258
|
+
<span class="hljs-keyword">return</span> [text_string].flatten
|
259
|
+
<span class="hljs-keyword">end</span>
|
260
|
+
|
261
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
262
|
+
|
263
|
+
</li>
|
264
|
+
|
265
|
+
|
266
|
+
<li id="section-9">
|
267
|
+
<div class="annotation">
|
268
|
+
|
269
|
+
<div class="pilwrap ">
|
270
|
+
<a class="pilcrow" href="#section-9">¶</a>
|
271
|
+
</div>
|
272
|
+
<h2 id="removes-names-from-surg-path-text">Removes names from surg path text</h2>
|
273
|
+
|
274
|
+
</div>
|
275
|
+
|
276
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-function"><span class="hljs-keyword">def</span> </span>names_cleaner text, names
|
277
|
+
|
278
|
+
names.map!{|z| split_if_space z}.flatten! <span class="hljs-keyword">if</span> names.<span class="hljs-keyword">class</span>==<span class="hljs-constant">Array</span>
|
279
|
+
names=name_split names <span class="hljs-keyword">if</span> names.<span class="hljs-keyword">class</span>==<span class="hljs-constant">String</span>
|
280
|
+
names.each <span class="hljs-keyword">do</span> |n|
|
281
|
+
|
282
|
+
r=<span class="hljs-constant">Regexp</span>.new(n, <span class="hljs-constant">Regexp::IGNORECASE</span>)
|
283
|
+
text.gsub! r,<span class="hljs-string">""</span>
|
284
|
+
puts <span class="hljs-string">"cleaned <span class="hljs-subst">#{n}</span>"</span>
|
285
|
+
<span class="hljs-keyword">end</span>
|
286
|
+
text
|
287
|
+
<span class="hljs-keyword">end</span>
|
288
|
+
|
289
|
+
|
290
|
+
<span class="hljs-class"><span class="hljs-keyword">class</span> <span class="hljs-title">CSV::Row</span></span>
|
291
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>pp
|
292
|
+
<span class="hljs-keyword">self</span>.to_hash
|
293
|
+
<span class="hljs-keyword">end</span>
|
294
|
+
|
295
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
296
|
+
|
297
|
+
</li>
|
298
|
+
|
299
|
+
|
300
|
+
<li id="section-10">
|
301
|
+
<div class="annotation">
|
302
|
+
|
303
|
+
<div class="pilwrap ">
|
304
|
+
<a class="pilcrow" href="#section-10">¶</a>
|
305
|
+
</div>
|
306
|
+
<h2 id="a-modified-csv-table-class">A modified CSV table class</h2>
|
307
|
+
<p>Can Decrypt and encrypt</p>
|
308
|
+
|
309
|
+
</div>
|
310
|
+
|
311
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-class"><span class="hljs-keyword">class</span> <span class="hljs-title">CSV::Table</span></span>
|
312
|
+
<span class="hljs-keyword">attr_accessor</span> <span class="hljs-symbol">:file_path</span>, <span class="hljs-symbol">:data_classifier</span>
|
313
|
+
|
314
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>encrypt col_name
|
315
|
+
<span class="hljs-keyword">if</span> <span class="hljs-variable">$e</span>==<span class="hljs-keyword">nil</span> <span class="hljs-keyword">then</span> <span class="hljs-constant">String</span>.set_encryption <span class="hljs-keyword">end</span>
|
316
|
+
<span class="hljs-keyword">self</span>.each <span class="hljs-keyword">do</span> |row|
|
317
|
+
row[col_name]=row[col_name].to_s.encrypt
|
318
|
+
<span class="hljs-keyword">end</span>
|
319
|
+
<span class="hljs-keyword">end</span>
|
320
|
+
|
321
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>encrypt_col_names col_names_array
|
322
|
+
col_names_array.each <span class="hljs-keyword">do</span> |col_name|
|
323
|
+
<span class="hljs-keyword">self</span>.encrypt col_name
|
324
|
+
<span class="hljs-keyword">end</span>
|
325
|
+
<span class="hljs-keyword">end</span>
|
326
|
+
|
327
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>decrypt col_name
|
328
|
+
<span class="hljs-keyword">if</span> <span class="hljs-variable">$e</span>==<span class="hljs-keyword">nil</span> <span class="hljs-keyword">then</span> <span class="hljs-constant">String</span>.set_encryption <span class="hljs-keyword">end</span>
|
329
|
+
<span class="hljs-keyword">self</span>.each <span class="hljs-keyword">do</span> |row|
|
330
|
+
row[col_name]=row[col_name].to_s.decrypt
|
331
|
+
<span class="hljs-keyword">end</span>
|
332
|
+
<span class="hljs-keyword">end</span>
|
333
|
+
|
334
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>clean_names names_col, diagnosis_col
|
335
|
+
<span class="hljs-keyword">self</span>.each <span class="hljs-keyword">do</span> |row|
|
336
|
+
row[diagnosis_col]=names_cleaner row[diagnosis_col],row[names_col]
|
337
|
+
<span class="hljs-keyword">end</span>
|
338
|
+
<span class="hljs-keyword">end</span>
|
339
|
+
|
340
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>md5 col_name
|
341
|
+
<span class="hljs-keyword">self</span>.each <span class="hljs-keyword">do</span> |row|
|
342
|
+
row[col_name]=row[col_name].to_s.md5
|
343
|
+
<span class="hljs-keyword">end</span>
|
344
|
+
<span class="hljs-keyword">end</span>
|
345
|
+
|
346
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>save file_name
|
347
|
+
<span class="hljs-constant">CSV</span>.open(file_name, <span class="hljs-string">"wb"</span>) <span class="hljs-keyword">do</span> |csv|
|
348
|
+
csv << <span class="hljs-keyword">self</span>.headers
|
349
|
+
<span class="hljs-keyword">self</span>.each <span class="hljs-keyword">do</span> |line|
|
350
|
+
csv << line.fields
|
351
|
+
<span class="hljs-keyword">end</span>
|
352
|
+
<span class="hljs-keyword">end</span>
|
353
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
354
|
+
|
355
|
+
</li>
|
356
|
+
|
357
|
+
|
358
|
+
<li id="section-11">
|
359
|
+
<div class="annotation">
|
360
|
+
|
361
|
+
<div class="pilwrap ">
|
362
|
+
<a class="pilcrow" href="#section-11">¶</a>
|
363
|
+
</div>
|
364
|
+
<p>z.find_rows :diagnosis_text, /.(T\d)./i</p>
|
365
|
+
|
366
|
+
</div>
|
367
|
+
|
368
|
+
<div class="content"><div class='highlight'><pre> <span class="hljs-function"><span class="hljs-keyword">def</span> </span>find_rows col_name, regex, decrypt=<span class="hljs-keyword">false</span>
|
369
|
+
new_table=[]<<<span class="hljs-keyword">self</span>.headers
|
370
|
+
<span class="hljs-keyword">if</span> decrypt <span class="hljs-keyword">then</span> <span class="hljs-keyword">self</span>.decrypt col_name <span class="hljs-keyword">end</span>
|
371
|
+
<span class="hljs-keyword">self</span>[col_name].each_with_index <span class="hljs-keyword">do</span> |r,i|</pre></div></div>
|
372
|
+
|
373
|
+
</li>
|
374
|
+
|
375
|
+
|
376
|
+
<li id="section-12">
|
377
|
+
<div class="annotation">
|
378
|
+
|
379
|
+
<div class="pilwrap ">
|
380
|
+
<a class="pilcrow" href="#section-12">¶</a>
|
381
|
+
</div>
|
382
|
+
<p>puts r</p>
|
383
|
+
|
384
|
+
</div>
|
385
|
+
|
386
|
+
<div class="content"><div class='highlight'><pre> <span class="hljs-keyword">if</span> r.match regex
|
387
|
+
puts i
|
388
|
+
new_table<<<span class="hljs-keyword">self</span>[i]
|
389
|
+
<span class="hljs-keyword">end</span>
|
390
|
+
<span class="hljs-keyword">end</span>
|
391
|
+
new_table.to_table
|
392
|
+
<span class="hljs-keyword">end</span>
|
393
|
+
|
394
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>to_mongo mongo_class
|
395
|
+
<span class="hljs-keyword">self</span>.each_with_index <span class="hljs-keyword">do</span> |row,i|
|
396
|
+
m=mongo_class.new
|
397
|
+
<span class="hljs-keyword">self</span>.headers.each <span class="hljs-keyword">do</span> |header|
|
398
|
+
puts <span class="hljs-string">"working on <span class="hljs-subst">#{header}</span> in row <span class="hljs-subst">#{i}</span>"</span>
|
399
|
+
m[header]=row[header]
|
400
|
+
<span class="hljs-keyword">end</span>
|
401
|
+
m.save
|
402
|
+
puts <span class="hljs-string">"<span class="hljs-subst">#{i}</span>: <span class="hljs-subst">#{mongo_class.count}</span>"</span>
|
403
|
+
<span class="hljs-keyword">end</span>
|
404
|
+
<span class="hljs-keyword">end</span>
|
405
|
+
|
406
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>mongo_code
|
407
|
+
<span class="hljs-keyword">self</span>.data_classifier.print_class
|
408
|
+
<span class="hljs-keyword">end</span>
|
409
|
+
<span class="hljs-keyword">alias</span> <span class="hljs-symbol">:print_mongo</span> <span class="hljs-symbol">:mongo_code</span>
|
410
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
411
|
+
|
412
|
+
</li>
|
413
|
+
|
414
|
+
|
415
|
+
<li id="section-13">
|
416
|
+
<div class="annotation">
|
417
|
+
|
418
|
+
<div class="pilwrap ">
|
419
|
+
<a class="pilcrow" href="#section-13">¶</a>
|
420
|
+
</div>
|
421
|
+
<h2 id="check-if-a-file-is-an-idiotic-csv">Check if a file is an idiotic ; csv</h2>
|
422
|
+
<p>by counting ; and , in header line</p>
|
423
|
+
|
424
|
+
</div>
|
425
|
+
|
426
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-function"><span class="hljs-keyword">def</span> </span>is_semicolon? file_path
|
427
|
+
csv=<span class="hljs-constant">CSV</span>.read(file_path)
|
428
|
+
<span class="hljs-keyword">if</span> (csv <span class="hljs-keyword">and</span> csv.count !=<span class="hljs-number">0</span>)
|
429
|
+
puts file_path
|
430
|
+
header=csv[<span class="hljs-number">0</span>][<span class="hljs-number">0</span>]
|
431
|
+
<span class="hljs-keyword">if</span> header==<span class="hljs-keyword">nil</span> <span class="hljs-keyword">then</span> <span class="hljs-keyword">return</span> <span class="hljs-keyword">false</span> <span class="hljs-keyword">end</span>
|
432
|
+
<span class="hljs-keyword">if</span> (header <span class="hljs-keyword">and</span> header.count(<span class="hljs-string">";"</span>))>header.count(<span class="hljs-string">","</span>)
|
433
|
+
<span class="hljs-keyword">return</span> <span class="hljs-keyword">true</span>
|
434
|
+
<span class="hljs-keyword">else</span>
|
435
|
+
<span class="hljs-keyword">return</span> <span class="hljs-keyword">false</span>
|
436
|
+
<span class="hljs-keyword">end</span>
|
437
|
+
<span class="hljs-keyword">end</span>
|
438
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
439
|
+
|
440
|
+
</li>
|
441
|
+
|
442
|
+
|
443
|
+
<li id="section-14">
|
444
|
+
<div class="annotation">
|
445
|
+
|
446
|
+
<div class="pilwrap ">
|
447
|
+
<a class="pilcrow" href="#section-14">¶</a>
|
448
|
+
</div>
|
449
|
+
<h2 id="check-if-a-tab-file">Check if a tab file</h2>
|
450
|
+
<p>by counting \t and , in header line</p>
|
451
|
+
|
452
|
+
</div>
|
453
|
+
|
454
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-function"><span class="hljs-keyword">def</span> </span>is_tab? file_path
|
455
|
+
csv=<span class="hljs-constant">CSV</span>.read(file_path)
|
456
|
+
<span class="hljs-keyword">if</span> (csv <span class="hljs-keyword">and</span> csv.count !=<span class="hljs-number">0</span>)
|
457
|
+
puts file_path
|
458
|
+
header=csv[<span class="hljs-number">0</span>][<span class="hljs-number">0</span>]
|
459
|
+
<span class="hljs-keyword">if</span> header==<span class="hljs-keyword">nil</span> <span class="hljs-keyword">then</span> <span class="hljs-keyword">return</span> <span class="hljs-keyword">false</span> <span class="hljs-keyword">end</span>
|
460
|
+
<span class="hljs-keyword">if</span> (header <span class="hljs-keyword">and</span> header.count(<span class="hljs-string">"\t"</span>))>header.count(<span class="hljs-string">","</span>)
|
461
|
+
<span class="hljs-keyword">return</span> <span class="hljs-keyword">true</span>
|
462
|
+
<span class="hljs-keyword">else</span>
|
463
|
+
<span class="hljs-keyword">return</span> <span class="hljs-keyword">false</span>
|
464
|
+
<span class="hljs-keyword">end</span>
|
465
|
+
<span class="hljs-keyword">end</span>
|
466
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
467
|
+
|
468
|
+
</li>
|
469
|
+
|
470
|
+
|
471
|
+
<li id="section-15">
|
472
|
+
<div class="annotation">
|
473
|
+
|
474
|
+
<div class="pilwrap ">
|
475
|
+
<a class="pilcrow" href="#section-15">¶</a>
|
476
|
+
</div>
|
477
|
+
<h2 id="remove-semicolons">Remove semicolons</h2>
|
478
|
+
<p>; => ,</p>
|
479
|
+
|
480
|
+
</div>
|
481
|
+
|
482
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-function"><span class="hljs-keyword">def</span> </span>remove_semicolon file_path
|
483
|
+
puts <span class="hljs-string">"removing semicolons in <span class="hljs-subst">#{file_path}</span>"</span>
|
484
|
+
c=<span class="hljs-constant">CSV</span>.table file_path, <span class="hljs-symbol">:col_sep=></span> <span class="hljs-string">";"</span>
|
485
|
+
fh=<span class="hljs-constant">File</span>.new file_path, <span class="hljs-string">"w"</span>
|
486
|
+
fh.write c.to_csv
|
487
|
+
fh.close
|
488
|
+
<span class="hljs-keyword">return</span> file_path
|
489
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
490
|
+
|
491
|
+
</li>
|
492
|
+
|
493
|
+
|
494
|
+
<li id="section-16">
|
495
|
+
<div class="annotation">
|
496
|
+
|
497
|
+
<div class="pilwrap ">
|
498
|
+
<a class="pilcrow" href="#section-16">¶</a>
|
499
|
+
</div>
|
500
|
+
<h2 id="remove-tabs">Remove tabs</h2>
|
501
|
+
<p>; => ,</p>
|
502
|
+
|
503
|
+
</div>
|
504
|
+
|
505
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-function"><span class="hljs-keyword">def</span> </span>remove_tabs file_path
|
506
|
+
puts <span class="hljs-string">"removing tabs in <span class="hljs-subst">#{file_path}</span>"</span>
|
507
|
+
c=<span class="hljs-constant">CSV</span>.table file_path, <span class="hljs-symbol">:col_sep=></span> <span class="hljs-string">"\t"</span>
|
508
|
+
fh=<span class="hljs-constant">File</span>.new file_path, <span class="hljs-string">"w"</span>
|
509
|
+
fh.write c.to_csv
|
510
|
+
fh.close
|
511
|
+
ext=<span class="hljs-constant">File</span>.extname file_path
|
512
|
+
<span class="hljs-keyword">if</span> ext==<span class="hljs-string">".xls"</span>
|
513
|
+
`cp <span class="hljs-comment">#{file_path} #{file_path.gsub ext,".csv"}` </span>
|
514
|
+
<span class="hljs-keyword">return</span> file_path.gsub ext,<span class="hljs-string">".csv"</span>
|
515
|
+
<span class="hljs-keyword">else</span>
|
516
|
+
<span class="hljs-keyword">return</span> file_path
|
517
|
+
<span class="hljs-keyword">end</span>
|
518
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
519
|
+
|
520
|
+
</li>
|
521
|
+
|
522
|
+
|
523
|
+
<li id="section-17">
|
524
|
+
<div class="annotation">
|
525
|
+
|
526
|
+
<div class="pilwrap ">
|
527
|
+
<a class="pilcrow" href="#section-17">¶</a>
|
528
|
+
</div>
|
529
|
+
<h2 id="a-modified-csv-table-class">A modified CSV table class</h2>
|
530
|
+
<p>Can Decrypt and encrypt</p>
|
531
|
+
|
532
|
+
</div>
|
533
|
+
|
534
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-class"><span class="hljs-keyword">class</span> <span class="hljs-title">CSV::Table</span></span>
|
535
|
+
<span class="hljs-keyword">attr_accessor</span> <span class="hljs-symbol">:file_path</span>, <span class="hljs-symbol">:data_classifier</span></pre></div></div>
|
536
|
+
|
537
|
+
</li>
|
538
|
+
|
539
|
+
|
540
|
+
<li id="section-18">
|
541
|
+
<div class="annotation">
|
542
|
+
|
543
|
+
<div class="pilwrap ">
|
544
|
+
<a class="pilcrow" href="#section-18">¶</a>
|
545
|
+
</div>
|
546
|
+
<p>remove duplicates</p>
|
547
|
+
<p>takes col to search through </p>
|
548
|
+
|
549
|
+
</div>
|
550
|
+
|
551
|
+
<div class="content"><div class='highlight'><pre> <span class="hljs-function"><span class="hljs-keyword">def</span> </span>remove_duplicate col
|
552
|
+
col_all=<span class="hljs-keyword">self</span>[col]
|
553
|
+
<span class="hljs-keyword">self</span>.each <span class="hljs-keyword">do</span> |row|
|
554
|
+
entry=row[col]
|
555
|
+
<span class="hljs-keyword">if</span> col_all.count(entry) >= <span class="hljs-number">2</span></pre></div></div>
|
556
|
+
|
557
|
+
</li>
|
558
|
+
|
559
|
+
|
560
|
+
<li id="section-19">
|
561
|
+
<div class="annotation">
|
562
|
+
|
563
|
+
<div class="pilwrap ">
|
564
|
+
<a class="pilcrow" href="#section-19">¶</a>
|
565
|
+
</div>
|
566
|
+
<p>deletes entries from index and from table</p>
|
567
|
+
|
568
|
+
</div>
|
569
|
+
|
570
|
+
<div class="content"><div class='highlight'><pre> <span class="hljs-keyword">self</span>.delete col_all.rindex(entry)
|
571
|
+
col_all.delete_at col_all.rindex(entry)
|
572
|
+
<span class="hljs-keyword">end</span>
|
573
|
+
<span class="hljs-keyword">end</span>
|
574
|
+
<span class="hljs-keyword">end</span>
|
575
|
+
<span class="hljs-keyword">end</span>
|
576
|
+
|
577
|
+
|
578
|
+
|
579
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>make_class_name file_path
|
580
|
+
<span class="hljs-constant">File</span>.basename((file_path).gsub(<span class="hljs-string">"."</span>,<span class="hljs-string">"_"</span>).gsub(<span class="hljs-string">"@"</span>,<span class="hljs-string">""</span>).gsub(<span class="hljs-string">"%"</span>,<span class="hljs-string">""</span>).gsub(<span class="hljs-string">"-"</span>,<span class="hljs-string">""</span>)).camelize
|
581
|
+
|
582
|
+
<span class="hljs-keyword">end</span>
|
583
|
+
|
584
|
+
|
585
|
+
<span class="hljs-class"><span class="hljs-keyword">module</span> <span class="hljs-title">StringToMongo</span></span>
|
586
|
+
|
587
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span><span class="hljs-keyword">self</span>.<span class="hljs-keyword">nil</span>? text_string
|
588
|
+
<span class="hljs-keyword">if</span> text_string==<span class="hljs-keyword">nil</span>
|
589
|
+
<span class="hljs-keyword">true</span>
|
590
|
+
<span class="hljs-keyword">else</span>
|
591
|
+
<span class="hljs-keyword">false</span>
|
592
|
+
<span class="hljs-keyword">end</span>
|
593
|
+
<span class="hljs-keyword">end</span>
|
594
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span><span class="hljs-keyword">self</span>.integer? text_string
|
595
|
+
<span class="hljs-keyword">if</span> text_string.match /^\d*<span class="hljs-variable">$/</span>
|
596
|
+
<span class="hljs-keyword">true</span>
|
597
|
+
<span class="hljs-keyword">else</span>
|
598
|
+
<span class="hljs-keyword">false</span>
|
599
|
+
<span class="hljs-keyword">end</span>
|
600
|
+
<span class="hljs-keyword">end</span>
|
601
|
+
|
602
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span><span class="hljs-keyword">self</span>.float? text_string
|
603
|
+
<span class="hljs-keyword">if</span> text_string.match /^\d*\.\d*<span class="hljs-variable">$/</span>
|
604
|
+
<span class="hljs-keyword">true</span>
|
605
|
+
<span class="hljs-keyword">else</span>
|
606
|
+
<span class="hljs-keyword">false</span>
|
607
|
+
<span class="hljs-keyword">end</span>
|
608
|
+
<span class="hljs-keyword">end</span>
|
609
|
+
|
610
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span><span class="hljs-keyword">self</span>.date? text_string
|
611
|
+
<span class="hljs-keyword">if</span> <span class="hljs-constant">Chronic</span>.parse(text_string) != <span class="hljs-keyword">nil</span>
|
612
|
+
<span class="hljs-keyword">true</span>
|
613
|
+
<span class="hljs-keyword">else</span>
|
614
|
+
<span class="hljs-keyword">false</span>
|
615
|
+
<span class="hljs-keyword">end</span>
|
616
|
+
<span class="hljs-keyword">end</span>
|
617
|
+
|
618
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span><span class="hljs-keyword">self</span>.mongo_type text_string
|
619
|
+
<span class="hljs-keyword">case</span>
|
620
|
+
<span class="hljs-keyword">when</span> <span class="hljs-keyword">nil</span>?(text_string)
|
621
|
+
<span class="hljs-string">"String"</span>
|
622
|
+
<span class="hljs-keyword">when</span> integer?(text_string)
|
623
|
+
<span class="hljs-string">"Integer"</span>
|
624
|
+
<span class="hljs-keyword">when</span> float?(text_string)
|
625
|
+
<span class="hljs-string">"Float"</span>
|
626
|
+
<span class="hljs-keyword">when</span> date?(text_string)
|
627
|
+
<span class="hljs-string">"Time"</span>
|
628
|
+
<span class="hljs-keyword">else</span>
|
629
|
+
<span class="hljs-string">"String"</span>
|
630
|
+
|
631
|
+
<span class="hljs-keyword">end</span>
|
632
|
+
<span class="hljs-keyword">end</span>
|
633
|
+
<span class="hljs-keyword">end</span>
|
634
|
+
|
635
|
+
<span class="hljs-class"><span class="hljs-keyword">module</span> <span class="hljs-title">ColName</span></span>
|
636
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span><span class="hljs-keyword">self</span>.fix_spaces col_name
|
637
|
+
col_name.gsub(<span class="hljs-string">"."</span>,<span class="hljs-string">"_"</span>).gsub(<span class="hljs-string">" "</span>,<span class="hljs-string">"_"</span>)
|
638
|
+
<span class="hljs-keyword">end</span>
|
639
|
+
|
640
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span><span class="hljs-keyword">self</span>.fix_numbers col_name
|
641
|
+
matches=(col_name.scan /(\d)/)
|
642
|
+
<span class="hljs-keyword">if</span> matches==[]
|
643
|
+
puts <span class="hljs-string">"no match"</span>
|
644
|
+
<span class="hljs-keyword">return</span> col_name
|
645
|
+
<span class="hljs-keyword">else</span>
|
646
|
+
n=matches[<span class="hljs-number">0</span>]
|
647
|
+
puts <span class="hljs-string">"captures <span class="hljs-subst">#{n}</span> <span class="hljs-subst">#{n.<span class="hljs-keyword">class</span>}</span>"</span>
|
648
|
+
<span class="hljs-keyword">case</span> n
|
649
|
+
<span class="hljs-keyword">when</span> <span class="hljs-string">"1"</span>
|
650
|
+
puts n
|
651
|
+
col_name.gsub!(n,<span class="hljs-string">"one"</span>)
|
652
|
+
<span class="hljs-keyword">when</span> <span class="hljs-string">"2"</span>
|
653
|
+
puts n
|
654
|
+
col_name.gsub!(n,<span class="hljs-string">"two"</span>)
|
655
|
+
<span class="hljs-keyword">when</span> <span class="hljs-string">"3"</span>
|
656
|
+
puts n
|
657
|
+
col_name.gsub!(n,<span class="hljs-string">"three"</span>)
|
658
|
+
<span class="hljs-keyword">when</span> <span class="hljs-string">"4"</span>
|
659
|
+
puts n
|
660
|
+
col_name.gsub!(n,<span class="hljs-string">"four"</span>)
|
661
|
+
<span class="hljs-keyword">when</span> <span class="hljs-string">"5"</span>
|
662
|
+
puts n
|
663
|
+
col_name.gsub!(n,<span class="hljs-string">"five"</span>)
|
664
|
+
<span class="hljs-keyword">when</span> <span class="hljs-string">"6"</span>
|
665
|
+
puts n
|
666
|
+
col_name.gsub!(n,<span class="hljs-string">"six"</span>)
|
667
|
+
<span class="hljs-keyword">when</span> <span class="hljs-string">"7"</span>
|
668
|
+
puts n
|
669
|
+
col_name.gsub!(n,<span class="hljs-string">"seven"</span>)
|
670
|
+
<span class="hljs-keyword">when</span> <span class="hljs-string">"8"</span>
|
671
|
+
puts n
|
672
|
+
col_name.gsub!(n,<span class="hljs-string">"eight"</span>)
|
673
|
+
<span class="hljs-keyword">when</span> <span class="hljs-string">"9"</span>
|
674
|
+
puts n
|
675
|
+
col_name.gsub!(n,<span class="hljs-string">"nine"</span>)
|
676
|
+
<span class="hljs-keyword">end</span>
|
677
|
+
<span class="hljs-keyword">end</span>
|
678
|
+
col_name
|
679
|
+
<span class="hljs-keyword">end</span>
|
680
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
681
|
+
|
682
|
+
</li>
|
683
|
+
|
684
|
+
|
685
|
+
<li id="section-20">
|
686
|
+
<div class="annotation">
|
687
|
+
|
688
|
+
<div class="pilwrap ">
|
689
|
+
<a class="pilcrow" href="#section-20">¶</a>
|
690
|
+
</div>
|
691
|
+
<h2 id="creates-a-mongo-class-mapping-csv-file">Creates a Mongo Class mapping csv file</h2>
|
692
|
+
<p>d=DataClassifier.new “/Users/carlobifulco/Dropbox/code/next_gen/hotspot2.csv”
|
693
|
+
d.print_class</p>
|
694
|
+
|
695
|
+
</div>
|
696
|
+
|
697
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-class"><span class="hljs-keyword">class</span> <span class="hljs-title">DataClassifier</span></span>
|
698
|
+
<span class="hljs-keyword">attr_accessor</span> <span class="hljs-symbol">:headers</span>, <span class="hljs-symbol">:fs_line</span>,<span class="hljs-symbol">:keys_types</span>,<span class="hljs-symbol">:file_name</span>,<span class="hljs-symbol">:template</span>
|
699
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>initialize file_name=<span class="hljs-string">"test.csv"</span>
|
700
|
+
|
701
|
+
<span class="hljs-variable">@header_zip</span>=<span class="hljs-keyword">self</span>.cheap_headers file_name
|
702
|
+
|
703
|
+
<span class="hljs-variable">@file_name</span>=file_name
|
704
|
+
<span class="hljs-variable">@class_name</span>=make_class_name file_name</pre></div></div>
|
705
|
+
|
706
|
+
</li>
|
707
|
+
|
708
|
+
|
709
|
+
<li id="section-21">
|
710
|
+
<div class="annotation">
|
711
|
+
|
712
|
+
<div class="pilwrap ">
|
713
|
+
<a class="pilcrow" href="#section-21">¶</a>
|
714
|
+
</div>
|
715
|
+
<p>self.instance_variable_set “@#{@class_name}”,make_mongo_class(@class_name)</p>
|
716
|
+
|
717
|
+
</div>
|
718
|
+
|
719
|
+
<div class="content"><div class='highlight'><pre>
|
720
|
+
<span class="hljs-variable">@template</span>=<span class="hljs-string">""</span><span class="hljs-string">"
|
721
|
+
class <span class="hljs-subst">#{<span class="hljs-variable">@class_name</span>}</span>
|
722
|
+
include MongoMapper::Document
|
723
|
+
include DataUtilities
|
724
|
+
safe
|
725
|
+
timestamps!
|
726
|
+
"</span><span class="hljs-string">""</span>
|
727
|
+
<span class="hljs-keyword">end</span>
|
728
|
+
|
729
|
+
|
730
|
+
|
731
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>cheap_headers file_path
|
732
|
+
index=<span class="hljs-number">0</span>
|
733
|
+
container=[]
|
734
|
+
<span class="hljs-constant">CSV</span>.foreach(file_path) <span class="hljs-keyword">do</span> |row|
|
735
|
+
container<<row</pre></div></div>
|
736
|
+
|
737
|
+
</li>
|
738
|
+
|
739
|
+
|
740
|
+
<li id="section-22">
|
741
|
+
<div class="annotation">
|
742
|
+
|
743
|
+
<div class="pilwrap ">
|
744
|
+
<a class="pilcrow" href="#section-22">¶</a>
|
745
|
+
</div>
|
746
|
+
<p>puts row</p>
|
747
|
+
|
748
|
+
</div>
|
749
|
+
|
750
|
+
<div class="content"><div class='highlight'><pre> index+=<span class="hljs-number">1</span>
|
751
|
+
<span class="hljs-keyword">if</span> index><span class="hljs-number">2</span> <span class="hljs-keyword">then</span> <span class="hljs-keyword">break</span> <span class="hljs-keyword">end</span>
|
752
|
+
<span class="hljs-keyword">end</span>
|
753
|
+
container[<span class="hljs-number">0</span>]=container[<span class="hljs-number">0</span>].map{|x| x.gsub(<span class="hljs-string">"."</span>,<span class="hljs-string">"_"</span>).downcase}
|
754
|
+
zipped=container[<span class="hljs-number">0</span>].zip container[<span class="hljs-number">1</span>]
|
755
|
+
zipped.select {|x| x[<span class="hljs-number">0</span>]!=<span class="hljs-string">""</span> <span class="hljs-keyword">and</span> x[<span class="hljs-number">0</span>]!=<span class="hljs-keyword">nil</span>}
|
756
|
+
<span class="hljs-keyword">end</span>
|
757
|
+
|
758
|
+
|
759
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>mongo_types
|
760
|
+
<span class="hljs-variable">@header_zip</span>.each_with_object({}) <span class="hljs-keyword">do</span> |hz,container|
|
761
|
+
key=hz[<span class="hljs-number">0</span>].gsub(<span class="hljs-string">" "</span>,<span class="hljs-string">"_"</span>)
|
762
|
+
container[hz[<span class="hljs-number">0</span>].to_sym]=<span class="hljs-constant">StringToMongo</span>.mongo_type hz[<span class="hljs-number">1</span>]
|
763
|
+
<span class="hljs-keyword">end</span>
|
764
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
765
|
+
|
766
|
+
</li>
|
767
|
+
|
768
|
+
|
769
|
+
<li id="section-23">
|
770
|
+
<div class="annotation">
|
771
|
+
|
772
|
+
<div class="pilwrap ">
|
773
|
+
<a class="pilcrow" href="#section-23">¶</a>
|
774
|
+
</div>
|
775
|
+
<p>prints the class</p>
|
776
|
+
|
777
|
+
</div>
|
778
|
+
|
779
|
+
<div class="content"><div class='highlight'><pre> <span class="hljs-function"><span class="hljs-keyword">def</span> </span>print_class
|
780
|
+
puts template
|
781
|
+
<span class="hljs-keyword">self</span>.mongo_types.sort_by{|k,v| k}.each <span class="hljs-keyword">do</span> |r|
|
782
|
+
puts <span class="hljs-string">" key :<span class="hljs-subst">#{r[<span class="hljs-number">0</span>]}</span>, <span class="hljs-subst">#{r[<span class="hljs-number">1</span>]}</span> "</span>
|
783
|
+
<span class="hljs-keyword">end</span>
|
784
|
+
puts <span class="hljs-string">"end"</span>
|
785
|
+
<span class="hljs-keyword">end</span>
|
786
|
+
|
787
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
788
|
+
|
789
|
+
</li>
|
790
|
+
|
791
|
+
|
792
|
+
<li id="section-24">
|
793
|
+
<div class="annotation">
|
794
|
+
|
795
|
+
<div class="pilwrap ">
|
796
|
+
<a class="pilcrow" href="#section-24">¶</a>
|
797
|
+
</div>
|
798
|
+
<h2 id="remove-empty-columns-from-csv-files">remove empty columns from CSV files</h2>
|
799
|
+
<p>also deals with some windows encoding issues if needed</p>
|
800
|
+
|
801
|
+
</div>
|
802
|
+
|
803
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-function"><span class="hljs-keyword">def</span> </span>remove_nil_headers file_path
|
804
|
+
<span class="hljs-keyword">begin</span>
|
805
|
+
c=<span class="hljs-constant">CSV</span>.read(file_path,<span class="hljs-symbol">:headers</span> => <span class="hljs-keyword">true</span>)
|
806
|
+
<span class="hljs-keyword">rescue</span>
|
807
|
+
c=<span class="hljs-constant">CSV</span>.read(<span class="hljs-variable">@file_name</span>,<span class="hljs-symbol">:headers</span> => <span class="hljs-keyword">true</span>, <span class="hljs-symbol">:encoding</span> => <span class="hljs-string">'windows-1251:utf-8'</span>)
|
808
|
+
<span class="hljs-keyword">end</span>
|
809
|
+
<span class="hljs-keyword">if</span> c.headers.<span class="hljs-keyword">include</span>? <span class="hljs-keyword">nil</span>
|
810
|
+
c.by_col!
|
811
|
+
<span class="hljs-keyword">while</span> c.headers.index(<span class="hljs-keyword">nil</span>) != <span class="hljs-keyword">nil</span>
|
812
|
+
c.delete(c.headers.index(<span class="hljs-keyword">nil</span>))
|
813
|
+
<span class="hljs-keyword">end</span>
|
814
|
+
fh=<span class="hljs-constant">File</span>.new file_path, <span class="hljs-string">"w"</span>
|
815
|
+
fh.write c.to_csv
|
816
|
+
fh.close
|
817
|
+
<span class="hljs-keyword">end</span>
|
818
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
819
|
+
|
820
|
+
</li>
|
821
|
+
|
822
|
+
|
823
|
+
<li id="section-25">
|
824
|
+
<div class="annotation">
|
825
|
+
|
826
|
+
<div class="pilwrap ">
|
827
|
+
<a class="pilcrow" href="#section-25">¶</a>
|
828
|
+
</div>
|
829
|
+
<h2 id="factory-for-new-tables">Factory for new tables</h2>
|
830
|
+
<p>takes care of definiens and also stores file paths
|
831
|
+
also removes nil headers\
|
832
|
+
also deals with tab formatted files</p>
|
833
|
+
|
834
|
+
</div>
|
835
|
+
|
836
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-function"><span class="hljs-keyword">def</span> </span>load_table file_path
|
837
|
+
<span class="hljs-keyword">if</span> is_semicolon?(file_path)
|
838
|
+
file_path=remove_semicolon(file_path)
|
839
|
+
<span class="hljs-keyword">elsif</span> is_tab?(file_path)
|
840
|
+
file_path=remove_tabs(file_path)
|
841
|
+
<span class="hljs-keyword">end</span>
|
842
|
+
remove_nil_headers file_path
|
843
|
+
c=<span class="hljs-constant">CSV</span>.table file_path
|
844
|
+
c.file_path=file_path
|
845
|
+
<span class="hljs-keyword">begin</span>
|
846
|
+
c.data_classifier=<span class="hljs-constant">DataClassifier</span>.new file_path
|
847
|
+
<span class="hljs-keyword">rescue</span>
|
848
|
+
c.data_classifier=<span class="hljs-keyword">false</span>
|
849
|
+
<span class="hljs-keyword">end</span>
|
850
|
+
c
|
851
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
852
|
+
|
853
|
+
</li>
|
854
|
+
|
855
|
+
|
856
|
+
<li id="section-26">
|
857
|
+
<div class="annotation">
|
858
|
+
|
859
|
+
<div class="pilwrap ">
|
860
|
+
<a class="pilcrow" href="#section-26">¶</a>
|
861
|
+
</div>
|
862
|
+
<h2 id="mongoloader">MongoLoader</h2>
|
863
|
+
<p>takes a class and a file csv file and then load it into Mongo
|
864
|
+
def mongo_loader mongo_class, file_path
|
865
|
+
counter=0
|
866
|
+
CSV.foreach(file_path) do |row|</p>
|
867
|
+
<pre><code><span class="hljs-comment">#puts row</span>
|
868
|
+
puts counter
|
869
|
+
<span class="hljs-keyword">if</span> counter==<span class="hljs-number">0</span>
|
870
|
+
<span class="hljs-variable">@headers</span>=row
|
871
|
+
counter+=<span class="hljs-number">1</span>
|
872
|
+
<span class="hljs-keyword">next</span>
|
873
|
+
<span class="hljs-keyword">end</span>
|
874
|
+
puts counter
|
875
|
+
m=mongo_class.new
|
876
|
+
row.each_with_index <span class="hljs-keyword">do</span> |e,i|
|
877
|
+
</code></pre>
|
878
|
+
</div>
|
879
|
+
|
880
|
+
</li>
|
881
|
+
|
882
|
+
|
883
|
+
<li id="section-27">
|
884
|
+
<div class="annotation">
|
885
|
+
|
886
|
+
<div class="pilwrap ">
|
887
|
+
<a class="pilcrow" href="#section-27">¶</a>
|
888
|
+
</div>
|
889
|
+
<pre><code> <span class="hljs-comment">#puts "HEADERS: #{<span class="hljs-yardoctag">@headers</span>}"</span>
|
890
|
+
<span class="hljs-comment">#puts <span class="hljs-yardoctag">@headers</span>[i]</span>
|
891
|
+
m[<span class="hljs-variable">@headers</span>[i].gsub(<span class="hljs-string">"."</span>,<span class="hljs-string">"_"</span>).downcase]=e
|
892
|
+
m.save
|
893
|
+
</code></pre>
|
894
|
+
</div>
|
895
|
+
|
896
|
+
</li>
|
897
|
+
|
898
|
+
|
899
|
+
<li id="section-28">
|
900
|
+
<div class="annotation">
|
901
|
+
|
902
|
+
<div class="pilwrap ">
|
903
|
+
<a class="pilcrow" href="#section-28">¶</a>
|
904
|
+
</div>
|
905
|
+
<pre><code><span class="hljs-keyword">end</span>
|
906
|
+
puts counter
|
907
|
+
counter+=<span class="hljs-number">1</span>
|
908
|
+
</code></pre><p> end
|
909
|
+
“”
|
910
|
+
end</p>
|
911
|
+
|
912
|
+
</div>
|
913
|
+
|
914
|
+
</li>
|
915
|
+
|
916
|
+
|
917
|
+
<li id="section-29">
|
918
|
+
<div class="annotation">
|
919
|
+
|
920
|
+
<div class="pilwrap ">
|
921
|
+
<a class="pilcrow" href="#section-29">¶</a>
|
922
|
+
</div>
|
923
|
+
<h2 id="load-csv-file-into-mongo-class">Load CSV file into mongo class</h2>
|
924
|
+
<p>Mongo class needs to exist</p>
|
925
|
+
|
926
|
+
</div>
|
927
|
+
|
928
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-function"><span class="hljs-keyword">def</span> </span>csv_to_mongo file_name=<span class="hljs-string">"test.csv"</span>,mongo_class=<span class="hljs-constant">TestCsv</span>
|
929
|
+
t=<span class="hljs-constant">CSV</span>.table file_name
|
930
|
+
t.each_with_index <span class="hljs-keyword">do</span> |row,i|
|
931
|
+
m=mongo_class.new
|
932
|
+
t.headers.each <span class="hljs-keyword">do</span> |header|
|
933
|
+
m[header]=row[header]
|
934
|
+
<span class="hljs-keyword">end</span>
|
935
|
+
m.save
|
936
|
+
puts <span class="hljs-string">"<span class="hljs-subst">#{i}</span>: <span class="hljs-subst">#{mongo_class.count}</span>"</span>
|
937
|
+
<span class="hljs-keyword">end</span>
|
938
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
939
|
+
|
940
|
+
</li>
|
941
|
+
|
942
|
+
|
943
|
+
<li id="section-30">
|
944
|
+
<div class="annotation">
|
945
|
+
|
946
|
+
<div class="pilwrap ">
|
947
|
+
<a class="pilcrow" href="#section-30">¶</a>
|
948
|
+
</div>
|
949
|
+
<h2 id="convinience-for-mongomapper-classes">Convinience for Mongomapper classes</h2>
|
950
|
+
<p>export to csv</p>
|
951
|
+
<p>pretty printing of keys</p>
|
952
|
+
|
953
|
+
</div>
|
954
|
+
|
955
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-class"><span class="hljs-keyword">module</span> <span class="hljs-title">DataUtilities</span></span></pre></div></div>
|
956
|
+
|
957
|
+
</li>
|
958
|
+
|
959
|
+
|
960
|
+
<li id="section-31">
|
961
|
+
<div class="annotation">
|
962
|
+
|
963
|
+
<div class="pilwrap ">
|
964
|
+
<a class="pilcrow" href="#section-31">¶</a>
|
965
|
+
</div>
|
966
|
+
<h2 id="export-csv">Export csv</h2>
|
967
|
+
<p>file_path - the file to be exported to</p>
|
968
|
+
|
969
|
+
</div>
|
970
|
+
|
971
|
+
<div class="content"><div class='highlight'><pre> <span class="hljs-function"><span class="hljs-keyword">def</span> </span>export file_path
|
972
|
+
<span class="hljs-constant">CSV</span>.open(file_path, <span class="hljs-string">"wb"</span>) <span class="hljs-keyword">do</span> |csv|
|
973
|
+
headers=<span class="hljs-keyword">self</span>.<span class="hljs-keyword">class</span>.keys.keys.sort
|
974
|
+
puts headers
|
975
|
+
csv << headers
|
976
|
+
<span class="hljs-keyword">self</span>.<span class="hljs-keyword">class</span>.all.each <span class="hljs-keyword">do</span> |c|
|
977
|
+
line=[]
|
978
|
+
headers.each <span class="hljs-keyword">do</span> |h|
|
979
|
+
line<<(c[h]).to_s
|
980
|
+
<span class="hljs-keyword">end</span>
|
981
|
+
csv << line
|
982
|
+
<span class="hljs-keyword">end</span>
|
983
|
+
puts csv
|
984
|
+
<span class="hljs-keyword">end</span>
|
985
|
+
<span class="hljs-keyword">end</span>
|
986
|
+
|
987
|
+
|
988
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>pp
|
989
|
+
<span class="hljs-keyword">self</span>.keys.keys.sort.each <span class="hljs-keyword">do</span> |k|
|
990
|
+
puts <span class="hljs-string">"<span class="hljs-subst">#{k}</span>: <span class="hljs-subst">#{<span class="hljs-keyword">self</span>[k]}</span>"</span>
|
991
|
+
<span class="hljs-keyword">end</span>
|
992
|
+
<span class="hljs-keyword">nil</span>
|
993
|
+
<span class="hljs-keyword">end</span>
|
994
|
+
|
995
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>pp_to_s
|
996
|
+
text=[]
|
997
|
+
<span class="hljs-keyword">self</span>.keys.keys.sort.each <span class="hljs-keyword">do</span> |k|
|
998
|
+
text<< <span class="hljs-string">" <span class="hljs-subst">#{k}</span>: <span class="hljs-subst">#{<span class="hljs-keyword">self</span>[k]}</span>;"</span>
|
999
|
+
<span class="hljs-keyword">end</span>
|
1000
|
+
text.join <span class="hljs-string">""</span>
|
1001
|
+
<span class="hljs-keyword">end</span>
|
1002
|
+
|
1003
|
+
<span class="hljs-keyword">end</span>
|
1004
|
+
|
1005
|
+
|
1006
|
+
<span class="hljs-class"><span class="hljs-keyword">module</span> <span class="hljs-title">ClassDataUtilities</span></span>
|
1007
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>export file_path
|
1008
|
+
<span class="hljs-constant">CSV</span>.open(file_path, <span class="hljs-string">"wb"</span>) <span class="hljs-keyword">do</span> |csv|
|
1009
|
+
headers=<span class="hljs-keyword">self</span>.keys.keys.sort
|
1010
|
+
puts headers
|
1011
|
+
csv << headers
|
1012
|
+
<span class="hljs-keyword">self</span>.all.each <span class="hljs-keyword">do</span> |c|
|
1013
|
+
line=[]
|
1014
|
+
headers.each <span class="hljs-keyword">do</span> |h|
|
1015
|
+
line<<(c[h]).to_s
|
1016
|
+
<span class="hljs-keyword">end</span>
|
1017
|
+
csv << line
|
1018
|
+
<span class="hljs-keyword">end</span>
|
1019
|
+
puts csv
|
1020
|
+
<span class="hljs-keyword">end</span>
|
1021
|
+
<span class="hljs-keyword">end</span>
|
1022
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
1023
|
+
|
1024
|
+
</li>
|
1025
|
+
|
1026
|
+
|
1027
|
+
<li id="section-32">
|
1028
|
+
<div class="annotation">
|
1029
|
+
|
1030
|
+
<div class="pilwrap ">
|
1031
|
+
<a class="pilcrow" href="#section-32">¶</a>
|
1032
|
+
</div>
|
1033
|
+
<h2 id="utility-function-to-create-mongomapper-keys">Utility function to create mongomapper keys</h2>
|
1034
|
+
<p>takes a file_path of teh csv file</p>
|
1035
|
+
<p>prints keys in mongomapper format</p>
|
1036
|
+
|
1037
|
+
</div>
|
1038
|
+
|
1039
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-function"><span class="hljs-keyword">def</span> </span>csv_headers_to_keys file_path
|
1040
|
+
<span class="hljs-constant">CSV</span>.table(file_path).headers.sort!.each <span class="hljs-keyword">do</span> |x|
|
1041
|
+
puts <span class="hljs-string">"key :<span class="hljs-subst">#{x}</span>, String"</span>
|
1042
|
+
<span class="hljs-keyword">end</span>
|
1043
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
1044
|
+
|
1045
|
+
</li>
|
1046
|
+
|
1047
|
+
|
1048
|
+
<li id="section-33">
|
1049
|
+
<div class="annotation">
|
1050
|
+
|
1051
|
+
<div class="pilwrap ">
|
1052
|
+
<a class="pilcrow" href="#section-33">¶</a>
|
1053
|
+
</div>
|
1054
|
+
<h2 id="utility-for-exporting-a-mongomapper-search">Utility for exporting a mongomapper search</h2>
|
1055
|
+
<p>takes a file name where all will be saved to in a csv format</p>
|
1056
|
+
<p>saves csv file</p>
|
1057
|
+
|
1058
|
+
</div>
|
1059
|
+
|
1060
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-class"><span class="hljs-keyword">class</span> <span class="hljs-title">Array</span></span>
|
1061
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>mongo_to_csv file_path
|
1062
|
+
<span class="hljs-constant">CSV</span>.open(file_path, <span class="hljs-string">"wb"</span>) <span class="hljs-keyword">do</span> |csv|
|
1063
|
+
headers=<span class="hljs-keyword">self</span>[<span class="hljs-number">0</span>].<span class="hljs-keyword">class</span>.keys.keys.sort
|
1064
|
+
puts headers
|
1065
|
+
csv << headers
|
1066
|
+
<span class="hljs-keyword">self</span>.each <span class="hljs-keyword">do</span> |c|
|
1067
|
+
line=[]
|
1068
|
+
headers.each <span class="hljs-keyword">do</span> |h|
|
1069
|
+
line<<(c[h]).to_s
|
1070
|
+
<span class="hljs-keyword">end</span>
|
1071
|
+
csv << line
|
1072
|
+
<span class="hljs-keyword">end</span>
|
1073
|
+
puts csv
|
1074
|
+
<span class="hljs-keyword">end</span>
|
1075
|
+
<span class="hljs-keyword">end</span>
|
1076
|
+
|
1077
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>mongo_to_table
|
1078
|
+
file_path=<span class="hljs-constant">Tempfile</span>.new <span class="hljs-string">"test"</span>
|
1079
|
+
<span class="hljs-keyword">self</span>.mongo_to_csv file_path
|
1080
|
+
r=<span class="hljs-constant">CSV</span>.table file_path
|
1081
|
+
`rm <span class="hljs-comment">#{file_path}`</span>
|
1082
|
+
r
|
1083
|
+
<span class="hljs-keyword">end</span>
|
1084
|
+
|
1085
|
+
<span class="hljs-keyword">end</span>
|
1086
|
+
|
1087
|
+
|
1088
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>val_to_csv val_name, file_name
|
1089
|
+
<span class="hljs-constant">Case</span>.find_all_by_validation_name(val_name).mongo_to_csv(file_name)
|
1090
|
+
puts <span class="hljs-string">"saved <span class="hljs-subst">#{<span class="hljs-constant">Case</span>.find_all_by_validation_name(val_name)}</span> in <span class="hljs-subst">#{<span class="hljs-constant">File</span>.absolute_path file_name}</span>"</span>
|
1091
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
1092
|
+
|
1093
|
+
</li>
|
1094
|
+
|
1095
|
+
|
1096
|
+
<li id="section-34">
|
1097
|
+
<div class="annotation">
|
1098
|
+
|
1099
|
+
<div class="pilwrap ">
|
1100
|
+
<a class="pilcrow" href="#section-34">¶</a>
|
1101
|
+
</div>
|
1102
|
+
<h2 id="a-modified-csv-table-class">A modified CSV table class</h2>
|
1103
|
+
<p>pp</p>
|
1104
|
+
|
1105
|
+
</div>
|
1106
|
+
|
1107
|
+
<div class="content"><div class='highlight'><pre><span class="hljs-class"><span class="hljs-keyword">class</span> <span class="hljs-title">CSV::Table</span></span>
|
1108
|
+
<span class="hljs-function"><span class="hljs-keyword">def</span> </span>pp
|
1109
|
+
puts <span class="hljs-keyword">self</span>.headers.to_csv
|
1110
|
+
<span class="hljs-keyword">self</span>.each <span class="hljs-keyword">do</span> |r|
|
1111
|
+
puts r.to_csv
|
1112
|
+
<span class="hljs-keyword">end</span>
|
1113
|
+
<span class="hljs-keyword">end</span>
|
1114
|
+
<span class="hljs-keyword">end</span></pre></div></div>
|
1115
|
+
|
1116
|
+
</li>
|
1117
|
+
|
1118
|
+
</ul>
|
1119
|
+
</div>
|
1120
|
+
</body>
|
1121
|
+
</html>
|