genevalidator 1.6.1 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -1
  3. data/.travis.yml +2 -0
  4. data/README.md +78 -30
  5. data/Rakefile +11 -8
  6. data/aux/app_template_footer.erb +1 -6
  7. data/aux/app_template_header.erb +12 -32
  8. data/aux/files/css/style.css +2 -8
  9. data/aux/files/js/plots.js +564 -576
  10. data/aux/files/js/script.js +10 -0
  11. data/aux/json_footer.erb +8 -0
  12. data/aux/json_header.erb +19 -0
  13. data/aux/json_query.erb +14 -0
  14. data/aux/template_footer.erb +9 -58
  15. data/aux/template_header.erb +18 -58
  16. data/aux/template_query.erb +8 -36
  17. data/bin/genevalidator +45 -32
  18. data/genevalidator.gemspec +11 -7
  19. data/lib/genevalidator.rb +75 -455
  20. data/lib/genevalidator/arg_validation.rb +78 -107
  21. data/lib/genevalidator/blast.rb +57 -60
  22. data/lib/genevalidator/clusterization.rb +15 -15
  23. data/lib/genevalidator/exceptions.rb +32 -5
  24. data/lib/genevalidator/get_raw_sequences.rb +70 -33
  25. data/lib/genevalidator/hsp.rb +1 -4
  26. data/lib/genevalidator/json_to_gv_results.rb +109 -0
  27. data/lib/genevalidator/output.rb +177 -185
  28. data/lib/genevalidator/pool.rb +2 -1
  29. data/lib/genevalidator/sequences.rb +3 -3
  30. data/lib/genevalidator/tabular_parser.rb +24 -18
  31. data/lib/genevalidator/validation.rb +279 -0
  32. data/lib/genevalidator/validation_alignment.rb +31 -47
  33. data/lib/genevalidator/validation_blast_reading_frame.rb +19 -18
  34. data/lib/genevalidator/validation_duplication.rb +23 -19
  35. data/lib/genevalidator/validation_gene_merge.rb +30 -65
  36. data/lib/genevalidator/validation_length_cluster.rb +14 -53
  37. data/lib/genevalidator/validation_length_rank.rb +10 -11
  38. data/lib/genevalidator/validation_open_reading_frame.rb +18 -19
  39. data/lib/genevalidator/validation_report.rb +2 -5
  40. data/lib/genevalidator/validation_test.rb +8 -4
  41. data/lib/genevalidator/version.rb +1 -1
  42. data/test/test_all_validations.rb +51 -66
  43. data/test/test_blast.rb +68 -51
  44. data/test/test_clusterization.rb +1 -1
  45. data/test/test_clusterization_2d.rb +19 -13
  46. data/test/test_extended_array_methods.rb +1 -1
  47. data/test/test_files/all_validations_mrna/mrna.blast_tab6 +1806 -0
  48. data/test/test_files/all_validations_mrna/mrna.blast_tab7 +1865 -0
  49. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml → mrna.blast_xml} +18642 -1
  50. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.index → mrna.blast_xml.index} +300 -0
  51. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta → mrna.fa} +0 -0
  52. data/test/test_files/all_validations_mrna/mrna.raw_seq +3970 -0
  53. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.raw_seq.idx → mrna.raw_seq.idx} +901 -1
  54. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_tab → prot.blast_tab6} +416 -0
  55. data/test/test_files/all_validations_prot/prot.blast_tab7 +2400 -0
  56. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml → prot.blast_xml} +18299 -6723
  57. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.index → prot.blast_xml.index} +408 -0
  58. data/test/test_files/all_validations_prot/{all_validations_prot.fasta → prot.fa} +0 -0
  59. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq → prot.raw_seq} +2735 -0
  60. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq.idx → prot.raw_seq.idx} +3032 -1808
  61. data/test/test_sequences.rb +46 -41
  62. data/test/test_validation_open_reading_frame.rb +318 -202
  63. data/test/test_validations.rb +48 -32
  64. metadata +76 -102
  65. data/doc/AliasDuplicationError.html +0 -134
  66. data/doc/AlignmentValidation.html +0 -1687
  67. data/doc/AlignmentValidationOutput.html +0 -659
  68. data/doc/Blast.html +0 -1905
  69. data/doc/BlastRFValidationOutput.html +0 -545
  70. data/doc/BlastReadingFrameValidation.html +0 -370
  71. data/doc/BlastUtils.html +0 -875
  72. data/doc/ClasspathError.html +0 -134
  73. data/doc/Cluster.html +0 -1316
  74. data/doc/DuplciationValidationOutput.html +0 -564
  75. data/doc/DuplicationValidation.html +0 -920
  76. data/doc/DuplicationValidationOutput.html +0 -564
  77. data/doc/FileNotFoundException.html +0 -134
  78. data/doc/GeneMergeValidation.html +0 -935
  79. data/doc/GeneMergeValidationOutput.html +0 -652
  80. data/doc/HierarchicalClusterization.html +0 -994
  81. data/doc/Hsp.html +0 -1485
  82. data/doc/InconsistentTabularFormat.html +0 -135
  83. data/doc/LengthClusterValidation.html +0 -982
  84. data/doc/LengthClusterValidationOutput.html +0 -515
  85. data/doc/LengthRankValidation.html +0 -496
  86. data/doc/LengthRankValidationOutput.html +0 -517
  87. data/doc/NoInternetError.html +0 -135
  88. data/doc/NoMafftInstallationError.html +0 -134
  89. data/doc/NoPIdentError.html +0 -134
  90. data/doc/NoValidationError.html +0 -134
  91. data/doc/NotEnoughHitsError.html +0 -135
  92. data/doc/ORFValidationOutput.html +0 -593
  93. data/doc/OpenReadingFrameValidation.html +0 -1107
  94. data/doc/OtherError.html +0 -123
  95. data/doc/Output.html +0 -1540
  96. data/doc/Pair.html +0 -309
  97. data/doc/PairCluster.html +0 -767
  98. data/doc/Plot.html +0 -837
  99. data/doc/QueryError.html +0 -134
  100. data/doc/ReportClassError.html +0 -135
  101. data/doc/Sequence.html +0 -1299
  102. data/doc/SequenceTypeError.html +0 -135
  103. data/doc/TabularEntry.html +0 -837
  104. data/doc/TabularParser.html +0 -1104
  105. data/doc/Validation.html +0 -2147
  106. data/doc/ValidationClassError.html +0 -134
  107. data/doc/ValidationOutput.html +0 -460
  108. data/doc/ValidationReport.html +0 -940
  109. data/doc/ValidationTest.html +0 -939
  110. data/doc/_index.html +0 -449
  111. data/doc/class_list.html +0 -54
  112. data/doc/css/common.css +0 -1
  113. data/doc/css/full_list.css +0 -57
  114. data/doc/css/style.css +0 -338
  115. data/doc/file.README.html +0 -151
  116. data/doc/file_list.html +0 -56
  117. data/doc/frames.html +0 -26
  118. data/doc/index.html +0 -151
  119. data/doc/js/app.js +0 -214
  120. data/doc/js/full_list.js +0 -178
  121. data/doc/js/jquery.js +0 -4
  122. data/doc/method_list.html +0 -1505
  123. data/doc/top-level-namespace.html +0 -112
  124. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab +0 -967
  125. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.index +0 -967
  126. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq +0 -4929
  127. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq.idx +0 -1006
  128. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_xml.raw_seq +0 -2075
  129. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.index +0 -1864
  130. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq +0 -42411
  131. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq.idx +0 -3751
@@ -1,994 +0,0 @@
1
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
2
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4
- <head>
5
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
6
- <title>
7
- Class: HierarchicalClusterization
8
-
9
- &mdash; Documentation by YARD 0.8.7.2
10
-
11
- </title>
12
-
13
- <link rel="stylesheet" href="css/style.css" type="text/css" charset="utf-8" />
14
-
15
- <link rel="stylesheet" href="css/common.css" type="text/css" charset="utf-8" />
16
-
17
- <script type="text/javascript" charset="utf-8">
18
- hasFrames = window.top.frames.main ? true : false;
19
- relpath = '';
20
- framesUrl = "frames.html#!" + escape(window.location.href);
21
- </script>
22
-
23
-
24
- <script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
25
-
26
- <script type="text/javascript" charset="utf-8" src="js/app.js"></script>
27
-
28
-
29
- </head>
30
- <body>
31
- <div id="header">
32
- <div id="menu">
33
-
34
- <a href="_index.html">Index (H)</a> &raquo;
35
-
36
-
37
- <span class="title">HierarchicalClusterization</span>
38
-
39
-
40
- <div class="noframes"><span class="title">(</span><a href="." target="_top">no frames</a><span class="title">)</span></div>
41
- </div>
42
-
43
- <div id="search">
44
-
45
- <a class="full_list_link" id="class_list_link"
46
- href="class_list.html">
47
- Class List
48
- </a>
49
-
50
- <a class="full_list_link" id="method_list_link"
51
- href="method_list.html">
52
- Method List
53
- </a>
54
-
55
- <a class="full_list_link" id="file_list_link"
56
- href="file_list.html">
57
- File List
58
- </a>
59
-
60
- </div>
61
- <div class="clear"></div>
62
- </div>
63
-
64
- <iframe id="search_frame"></iframe>
65
-
66
- <div id="content"><h1>Class: HierarchicalClusterization
67
-
68
-
69
-
70
- </h1>
71
-
72
- <dl class="box">
73
-
74
- <dt class="r1">Inherits:</dt>
75
- <dd class="r1">
76
- <span class="inheritName">Object</span>
77
-
78
- <ul class="fullTree">
79
- <li>Object</li>
80
-
81
- <li class="next">HierarchicalClusterization</li>
82
-
83
- </ul>
84
- <a href="#" class="inheritanceTree">show all</a>
85
-
86
- </dd>
87
-
88
-
89
-
90
-
91
-
92
-
93
-
94
-
95
-
96
- <dt class="r2 last">Defined in:</dt>
97
- <dd class="r2 last">lib/genevalidator/clusterization.rb</dd>
98
-
99
- </dl>
100
- <div class="clear"></div>
101
-
102
-
103
-
104
-
105
-
106
- <h2>Instance Attribute Summary <small>(<a href="#" class="summary_toggle">collapse</a>)</small></h2>
107
- <ul class="summary">
108
-
109
- <li class="public ">
110
- <span class="summary_signature">
111
-
112
- <a href="#clusters-instance_method" title="#clusters (instance method)">- (Object) <strong>clusters</strong> </a>
113
-
114
-
115
-
116
- </span>
117
-
118
-
119
-
120
-
121
-
122
-
123
-
124
-
125
-
126
-
127
-
128
-
129
- <span class="summary_desc"><div class='inline'>
130
- <p>Returns the value of attribute clusters.</p>
131
- </div></span>
132
-
133
- </li>
134
-
135
-
136
- <li class="public ">
137
- <span class="summary_signature">
138
-
139
- <a href="#values-instance_method" title="#values (instance method)">- (Object) <strong>values</strong> </a>
140
-
141
-
142
-
143
- </span>
144
-
145
-
146
-
147
-
148
-
149
-
150
-
151
-
152
-
153
-
154
-
155
-
156
- <span class="summary_desc"><div class='inline'>
157
- <p>Returns the value of attribute values.</p>
158
- </div></span>
159
-
160
- </li>
161
-
162
-
163
- </ul>
164
-
165
-
166
-
167
-
168
-
169
- <h2>
170
- Instance Method Summary
171
- <small>(<a href="#" class="summary_toggle">collapse</a>)</small>
172
- </h2>
173
-
174
- <ul class="summary">
175
-
176
- <li class="public ">
177
- <span class="summary_signature">
178
-
179
- <a href="#hierarchical_clusterization-instance_method" title="#hierarchical_clusterization (instance method)">- (Object) <strong>hierarchical_clusterization</strong>(no_clusters = 0, distance_method = 0, vec = @values, debug = false) </a>
180
-
181
-
182
-
183
- </span>
184
-
185
-
186
-
187
-
188
-
189
-
190
-
191
-
192
-
193
- <span class="summary_desc"><div class='inline'>
194
- <p>Makes an hierarchical clusterization until the most dense cluster is
195
- obtained or the distance between clusters is sufficintly big or the
196
- desired number of clusters is obtained Params: <tt>vec</tt>: a vector of
197
- values (by default the values from initialization) <tt>no_clusters</tt>:
198
- stop test (number of clusters) <tt>distance_method</tt>: distance method
199
- (method 0 or method 1) <tt>debug</tt>: display debug information Output:
200
- vector of <tt>Cluster</tt> objects.</p>
201
- </div></span>
202
-
203
- </li>
204
-
205
-
206
- <li class="public ">
207
- <span class="summary_signature">
208
-
209
- <a href="#hierarchical_clusterization_2d-instance_method" title="#hierarchical_clusterization_2d (instance method)">- (Object) <strong>hierarchical_clusterization_2d</strong>(no_clusters = 0, distance_method = 0, vec = @values, debug = false) </a>
210
-
211
-
212
-
213
- </span>
214
-
215
-
216
-
217
-
218
-
219
-
220
-
221
-
222
-
223
- <span class="summary_desc"><div class='inline'></div></span>
224
-
225
- </li>
226
-
227
-
228
- <li class="public ">
229
- <span class="summary_signature">
230
-
231
- <a href="#initialize-instance_method" title="#initialize (instance method)">- (HierarchicalClusterization) <strong>initialize</strong>(values) </a>
232
-
233
-
234
-
235
- </span>
236
-
237
-
238
- <span class="note title constructor">constructor</span>
239
-
240
-
241
-
242
-
243
-
244
-
245
-
246
-
247
- <span class="summary_desc"><div class='inline'>
248
- <p>Object initialization Params: <tt>values</tt> :vector of values.</p>
249
- </div></span>
250
-
251
- </li>
252
-
253
-
254
- <li class="public ">
255
- <span class="summary_signature">
256
-
257
- <a href="#most_dense_cluster-instance_method" title="#most_dense_cluster (instance method)">- (Object) <strong>most_dense_cluster</strong>(clusters = @clusters) </a>
258
-
259
-
260
-
261
- </span>
262
-
263
-
264
-
265
-
266
-
267
-
268
-
269
-
270
-
271
- <span class="summary_desc"><div class='inline'>
272
- <p>Returns the cluster with the maimum density Params: <tt>clusters</tt>: list
273
- of <tt>Clususter</tt> objects.</p>
274
- </div></span>
275
-
276
- </li>
277
-
278
-
279
- </ul>
280
-
281
-
282
- <div id="constructor_details" class="method_details_list">
283
- <h2>Constructor Details</h2>
284
-
285
- <div class="method_details first">
286
- <h3 class="signature first" id="initialize-instance_method">
287
-
288
- - (<tt><span class='object_link'><a href="" title="HierarchicalClusterization (class)">HierarchicalClusterization</a></span></tt>) <strong>initialize</strong>(values)
289
-
290
-
291
-
292
-
293
-
294
- </h3><div class="docstring">
295
- <div class="discussion">
296
-
297
- <p>Object initialization Params: <tt>values</tt> :vector of values</p>
298
-
299
-
300
- </div>
301
- </div>
302
- <div class="tags">
303
-
304
-
305
- </div><table class="source_code">
306
- <tr>
307
- <td>
308
- <pre class="lines">
309
-
310
-
311
- 382
312
- 383
313
- 384
314
- 385</pre>
315
- </td>
316
- <td>
317
- <pre class="code"><span class="info file"># File 'lib/genevalidator/clusterization.rb', line 382</span>
318
-
319
- <span class='kw'>def</span> <span class='id identifier rubyid_initialize'>initialize</span><span class='lparen'>(</span><span class='id identifier rubyid_values'>values</span><span class='rparen'>)</span>
320
- <span class='ivar'>@values</span> <span class='op'>=</span> <span class='id identifier rubyid_values'>values</span>
321
- <span class='ivar'>@clusters</span> <span class='op'>=</span> <span class='lbracket'>[</span><span class='rbracket'>]</span>
322
- <span class='kw'>end</span></pre>
323
- </td>
324
- </tr>
325
- </table>
326
- </div>
327
-
328
- </div>
329
-
330
- <div id="instance_attr_details" class="attr_details">
331
- <h2>Instance Attribute Details</h2>
332
-
333
-
334
- <span id="clusters=-instance_method"></span>
335
- <div class="method_details first">
336
- <h3 class="signature first" id="clusters-instance_method">
337
-
338
- - (<tt>Object</tt>) <strong>clusters</strong>
339
-
340
-
341
-
342
-
343
-
344
- </h3><div class="docstring">
345
- <div class="discussion">
346
-
347
- <p>Returns the value of attribute clusters</p>
348
-
349
-
350
- </div>
351
- </div>
352
- <div class="tags">
353
-
354
-
355
- </div><table class="source_code">
356
- <tr>
357
- <td>
358
- <pre class="lines">
359
-
360
-
361
- 376
362
- 377
363
- 378</pre>
364
- </td>
365
- <td>
366
- <pre class="code"><span class="info file"># File 'lib/genevalidator/clusterization.rb', line 376</span>
367
-
368
- <span class='kw'>def</span> <span class='id identifier rubyid_clusters'>clusters</span>
369
- <span class='ivar'>@clusters</span>
370
- <span class='kw'>end</span></pre>
371
- </td>
372
- </tr>
373
- </table>
374
- </div>
375
-
376
-
377
- <span id="values=-instance_method"></span>
378
- <div class="method_details ">
379
- <h3 class="signature " id="values-instance_method">
380
-
381
- - (<tt>Object</tt>) <strong>values</strong>
382
-
383
-
384
-
385
-
386
-
387
- </h3><div class="docstring">
388
- <div class="discussion">
389
-
390
- <p>Returns the value of attribute values</p>
391
-
392
-
393
- </div>
394
- </div>
395
- <div class="tags">
396
-
397
-
398
- </div><table class="source_code">
399
- <tr>
400
- <td>
401
- <pre class="lines">
402
-
403
-
404
- 375
405
- 376
406
- 377</pre>
407
- </td>
408
- <td>
409
- <pre class="code"><span class="info file"># File 'lib/genevalidator/clusterization.rb', line 375</span>
410
-
411
- <span class='kw'>def</span> <span class='id identifier rubyid_values'>values</span>
412
- <span class='ivar'>@values</span>
413
- <span class='kw'>end</span></pre>
414
- </td>
415
- </tr>
416
- </table>
417
- </div>
418
-
419
- </div>
420
-
421
-
422
- <div id="instance_method_details" class="method_details_list">
423
- <h2>Instance Method Details</h2>
424
-
425
-
426
- <div class="method_details first">
427
- <h3 class="signature first" id="hierarchical_clusterization-instance_method">
428
-
429
- - (<tt>Object</tt>) <strong>hierarchical_clusterization</strong>(no_clusters = 0, distance_method = 0, vec = @values, debug = false)
430
-
431
-
432
-
433
-
434
-
435
- </h3><div class="docstring">
436
- <div class="discussion">
437
-
438
- <p>Makes an hierarchical clusterization until the most dense cluster is
439
- obtained or the distance between clusters is sufficintly big or the
440
- desired number of clusters is obtained Params: <tt>vec</tt>: a vector of
441
- values (by default the values from initialization) <tt>no_clusters</tt>:
442
- stop test (number of clusters) <tt>distance_method</tt>: distance method
443
- (method 0 or method 1) <tt>debug</tt>: display debug information Output:
444
- vector of <tt>Cluster</tt> objects</p>
445
-
446
-
447
- </div>
448
- </div>
449
- <div class="tags">
450
-
451
-
452
- </div><table class="source_code">
453
- <tr>
454
- <td>
455
- <pre class="lines">
456
-
457
-
458
- 503
459
- 504
460
- 505
461
- 506
462
- 507
463
- 508
464
- 509
465
- 510
466
- 511
467
- 512
468
- 513
469
- 514
470
- 515
471
- 516
472
- 517
473
- 518
474
- 519
475
- 520
476
- 521
477
- 522
478
- 523
479
- 524
480
- 525
481
- 526
482
- 527
483
- 528
484
- 529
485
- 530
486
- 531
487
- 532
488
- 533
489
- 534
490
- 535
491
- 536
492
- 537
493
- 538
494
- 539
495
- 540
496
- 541
497
- 542
498
- 543
499
- 544
500
- 545
501
- 546
502
- 547
503
- 548
504
- 549
505
- 550
506
- 551
507
- 552
508
- 553
509
- 554
510
- 555
511
- 556
512
- 557
513
- 558
514
- 559
515
- 560
516
- 561
517
- 562
518
- 563
519
- 564
520
- 565
521
- 566
522
- 567
523
- 568
524
- 569
525
- 570
526
- 571
527
- 572
528
- 573
529
- 574
530
- 575
531
- 576
532
- 577
533
- 578
534
- 579
535
- 580
536
- 581
537
- 582
538
- 583
539
- 584
540
- 585
541
- 586
542
- 587
543
- 588
544
- 589
545
- 590
546
- 591
547
- 592
548
- 593
549
- 594
550
- 595
551
- 596
552
- 597
553
- 598
554
- 599
555
- 600
556
- 601
557
- 602
558
- 603
559
- 604
560
- 605
561
- 606
562
- 607
563
- 608
564
- 609
565
- 610</pre>
566
- </td>
567
- <td>
568
- <pre class="code"><span class="info file"># File 'lib/genevalidator/clusterization.rb', line 503</span>
569
-
570
- <span class='kw'>def</span> <span class='id identifier rubyid_hierarchical_clusterization'>hierarchical_clusterization</span> <span class='lparen'>(</span><span class='id identifier rubyid_no_clusters'>no_clusters</span> <span class='op'>=</span> <span class='int'>0</span><span class='comma'>,</span> <span class='id identifier rubyid_distance_method'>distance_method</span> <span class='op'>=</span> <span class='int'>0</span><span class='comma'>,</span> <span class='id identifier rubyid_vec'>vec</span> <span class='op'>=</span> <span class='ivar'>@values</span><span class='comma'>,</span> <span class='id identifier rubyid_debug'>debug</span> <span class='op'>=</span> <span class='kw'>false</span><span class='rparen'>)</span>
571
- <span class='id identifier rubyid_clusters'>clusters</span> <span class='op'>=</span> <span class='lbracket'>[</span><span class='rbracket'>]</span>
572
- <span class='id identifier rubyid_vec'>vec</span> <span class='op'>=</span> <span class='id identifier rubyid_vec'>vec</span><span class='period'>.</span><span class='id identifier rubyid_sort'>sort</span>
573
-
574
- <span class='kw'>if</span> <span class='id identifier rubyid_vec'>vec</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>==</span> <span class='int'>1</span>
575
- <span class='id identifier rubyid_hash'>hash</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='id identifier rubyid_vec'>vec</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='op'>=&gt;</span><span class='int'>1</span><span class='rbrace'>}</span>
576
- <span class='id identifier rubyid_cluster'>cluster</span> <span class='op'>=</span> <span class='const'>Cluster</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='id identifier rubyid_hash'>hash</span><span class='rparen'>)</span>
577
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='id identifier rubyid_cluster'>cluster</span><span class='rparen'>)</span>
578
- <span class='id identifier rubyid_clusters'>clusters</span>
579
- <span class='kw'>end</span>
580
-
581
- <span class='comment'># Thresholds
582
- </span> <span class='id identifier rubyid_threshold_distance'>threshold_distance</span> <span class='op'>=</span> <span class='lparen'>(</span><span class='float'>0.25</span> <span class='op'>*</span> <span class='lparen'>(</span><span class='id identifier rubyid_vec'>vec</span><span class='period'>.</span><span class='id identifier rubyid_max'>max</span><span class='op'>-</span><span class='id identifier rubyid_vec'>vec</span><span class='period'>.</span><span class='id identifier rubyid_min'>min</span><span class='rparen'>)</span><span class='rparen'>)</span>
583
- <span class='id identifier rubyid_threshold_density'>threshold_density</span> <span class='op'>=</span> <span class='lparen'>(</span><span class='float'>0.5</span> <span class='op'>*</span> <span class='id identifier rubyid_vec'>vec</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_to_i'>to_i</span>
584
-
585
- <span class='comment'># make a histogram from the input vector
586
- </span> <span class='id identifier rubyid_histogram'>histogram</span> <span class='op'>=</span> <span class='const'>Hash</span><span class='lbracket'>[</span><span class='id identifier rubyid_vec'>vec</span><span class='period'>.</span><span class='id identifier rubyid_group_by'>group_by</span> <span class='lbrace'>{</span> <span class='op'>|</span><span class='id identifier rubyid_x'>x</span><span class='op'>|</span> <span class='id identifier rubyid_x'>x</span> <span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_map'>map</span> <span class='lbrace'>{</span> <span class='op'>|</span><span class='id identifier rubyid_k'>k</span><span class='comma'>,</span> <span class='id identifier rubyid_vs'>vs</span><span class='op'>|</span> <span class='lbracket'>[</span><span class='id identifier rubyid_k'>k</span><span class='comma'>,</span> <span class='id identifier rubyid_vs'>vs</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='rbracket'>]</span> <span class='rbrace'>}</span><span class='rbracket'>]</span>
587
-
588
- <span class='comment'># clusters = array of clusters
589
- </span> <span class='comment'>#initially each length belongs to a different cluster
590
- </span> <span class='id identifier rubyid_histogram'>histogram</span><span class='period'>.</span><span class='id identifier rubyid_sort'>sort</span> <span class='lbrace'>{</span><span class='op'>|</span><span class='id identifier rubyid_a'>a</span><span class='comma'>,</span><span class='id identifier rubyid_b'>b</span><span class='op'>|</span> <span class='id identifier rubyid_a'>a</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='op'>&lt;=&gt;</span><span class='id identifier rubyid_b'>b</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_elem'>elem</span><span class='op'>|</span>
591
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
592
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>len </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_elem'>elem</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='rbrace'>}</span><span class='tstring_content'> appears </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_elem'>elem</span><span class='lbracket'>[</span><span class='int'>1</span><span class='rbracket'>]</span><span class='rbrace'>}</span><span class='tstring_content'> times</span><span class='tstring_end'>&quot;</span></span>
593
- <span class='kw'>end</span>
594
- <span class='id identifier rubyid_hash'>hash</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='id identifier rubyid_elem'>elem</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span> <span class='op'>=&gt;</span> <span class='id identifier rubyid_elem'>elem</span><span class='lbracket'>[</span><span class='int'>1</span><span class='rbracket'>]</span><span class='rbrace'>}</span>
595
- <span class='id identifier rubyid_cluster'>cluster</span> <span class='op'>=</span> <span class='const'>Cluster</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='id identifier rubyid_hash'>hash</span><span class='rparen'>)</span>
596
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='id identifier rubyid_cluster'>cluster</span><span class='rparen'>)</span>
597
- <span class='kw'>end</span>
598
-
599
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
600
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_elem'>elem</span><span class='op'>|</span>
601
- <span class='id identifier rubyid_elem'>elem</span><span class='period'>.</span><span class='id identifier rubyid_print'>print</span>
602
- <span class='kw'>end</span>
603
- <span class='kw'>end</span>
604
-
605
- <span class='kw'>if</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>==</span> <span class='int'>1</span>
606
- <span class='kw'>return</span> <span class='id identifier rubyid_clusters'>clusters</span>
607
- <span class='kw'>end</span>
608
-
609
- <span class='comment'># each iteration merge the closest two adiacent cluster
610
- </span> <span class='comment'># the loop stops according to the stop conditions
611
- </span> <span class='id identifier rubyid_iteration'>iteration</span> <span class='op'>=</span> <span class='int'>0</span>
612
- <span class='id identifier rubyid_loop'>loop</span> <span class='kw'>do</span>
613
-
614
- <span class='comment'>#stop condition 1
615
- </span> <span class='kw'>if</span> <span class='id identifier rubyid_no_clusters'>no_clusters</span> <span class='op'>!=</span> <span class='int'>0</span> <span class='kw'>and</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>==</span> <span class='id identifier rubyid_no_clusters'>no_clusters</span>
616
- <span class='kw'>break</span>
617
- <span class='kw'>end</span>
618
-
619
- <span class='id identifier rubyid_iteration'>iteration</span> <span class='op'>=</span> <span class='id identifier rubyid_iteration'>iteration</span> <span class='op'>+</span> <span class='int'>1</span>
620
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
621
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>\nIteration </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_iteration'>iteration</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
622
- <span class='kw'>end</span>
623
-
624
- <span class='id identifier rubyid_min_distance'>min_distance</span> <span class='op'>=</span> <span class='int'>100000000</span>
625
- <span class='id identifier rubyid_cluster'>cluster</span> <span class='op'>=</span> <span class='int'>0</span>
626
- <span class='id identifier rubyid_density'>density</span> <span class='op'>=</span> <span class='int'>0</span>
627
-
628
- <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='int'>0</span><span class='op'>..</span><span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>-</span><span class='int'>2</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_each_with_index'>each_with_index</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_item'>item</span><span class='comma'>,</span> <span class='id identifier rubyid_i'>i</span><span class='op'>|</span>
629
- <span class='id identifier rubyid_dist'>dist</span> <span class='op'>=</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_i'>i</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_distance'>distance</span><span class='lparen'>(</span><span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_i'>i</span><span class='op'>+</span><span class='int'>1</span><span class='rbracket'>]</span><span class='comma'>,</span> <span class='id identifier rubyid_distance_method'>distance_method</span><span class='rparen'>)</span>
630
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
631
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>distance between clusters </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_i'>i</span><span class='rbrace'>}</span><span class='tstring_content'> and </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_i'>i</span><span class='op'>+</span><span class='int'>1</span><span class='rbrace'>}</span><span class='tstring_content'> is </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_dist'>dist</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
632
- <span class='kw'>end</span>
633
- <span class='id identifier rubyid_current_density'>current_density</span> <span class='op'>=</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_i'>i</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_density'>density</span> <span class='op'>+</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_i'>i</span><span class='op'>+</span><span class='int'>1</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_density'>density</span>
634
- <span class='kw'>if</span> <span class='id identifier rubyid_dist'>dist</span> <span class='op'>&lt;</span> <span class='id identifier rubyid_min_distance'>min_distance</span>
635
- <span class='id identifier rubyid_min_distance'>min_distance</span> <span class='op'>=</span> <span class='id identifier rubyid_dist'>dist</span>
636
- <span class='id identifier rubyid_cluster'>cluster</span> <span class='op'>=</span> <span class='id identifier rubyid_i'>i</span>
637
- <span class='id identifier rubyid_density'>density</span> <span class='op'>=</span> <span class='id identifier rubyid_current_density'>current_density</span>
638
- <span class='kw'>else</span>
639
- <span class='kw'>if</span> <span class='id identifier rubyid_dist'>dist</span> <span class='op'>==</span> <span class='id identifier rubyid_min_distance'>min_distance</span> <span class='kw'>and</span> <span class='id identifier rubyid_density'>density</span> <span class='op'>&lt;</span> <span class='id identifier rubyid_current_density'>current_density</span>
640
- <span class='id identifier rubyid_cluster'>cluster</span> <span class='op'>=</span> <span class='id identifier rubyid_i'>i</span>
641
- <span class='id identifier rubyid_density'>density</span> <span class='op'>=</span> <span class='id identifier rubyid_current_density'>current_density</span>
642
- <span class='kw'>end</span>
643
- <span class='kw'>end</span>
644
- <span class='kw'>end</span>
645
-
646
-
647
- <span class='comment'>#stop condition 2
648
- </span> <span class='comment'>#the distance between the closest clusters exceeds the threshold
649
- </span> <span class='kw'>if</span> <span class='id identifier rubyid_no_clusters'>no_clusters</span> <span class='op'>==</span> <span class='int'>0</span> <span class='kw'>and</span> <span class='lparen'>(</span><span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_cluster'>cluster</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_mean'>mean</span> <span class='op'>-</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_cluster'>cluster</span><span class='op'>+</span><span class='int'>1</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_mean'>mean</span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_abs'>abs</span> <span class='op'>&gt;</span> <span class='id identifier rubyid_threshold_distance'>threshold_distance</span>
650
- <span class='kw'>break</span>
651
- <span class='kw'>end</span>
652
-
653
- <span class='comment'>#merge clusters 'cluster' and 'cluster'+1
654
- </span> <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
655
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>clusters to merge </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_cluster'>cluster</span><span class='rbrace'>}</span><span class='tstring_content'> and </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_cluster'>cluster</span><span class='op'>+</span><span class='int'>1</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
656
- <span class='kw'>end</span>
657
-
658
- <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_cluster'>cluster</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_add'>add</span><span class='lparen'>(</span><span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_cluster'>cluster</span><span class='op'>+</span><span class='int'>1</span><span class='rbracket'>]</span><span class='rparen'>)</span>
659
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_delete_at'>delete_at</span><span class='lparen'>(</span><span class='id identifier rubyid_cluster'>cluster</span><span class='op'>+</span><span class='int'>1</span><span class='rparen'>)</span>
660
-
661
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
662
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_each_with_index'>each_with_index</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_elem'>elem</span><span class='comma'>,</span> <span class='id identifier rubyid_i'>i</span><span class='op'>|</span>
663
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>cluster </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_i'>i</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
664
- <span class='id identifier rubyid_elem'>elem</span><span class='period'>.</span><span class='id identifier rubyid_print'>print</span>
665
- <span class='kw'>end</span>
666
- <span class='kw'>end</span>
667
-
668
- <span class='comment'>#stop condition 3
669
- </span> <span class='comment'>#the density of the biggest clusters exceeds the threshold
670
- </span> <span class='kw'>if</span> <span class='id identifier rubyid_no_clusters'>no_clusters</span> <span class='op'>==</span> <span class='int'>0</span> <span class='kw'>and</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_cluster'>cluster</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_density'>density</span> <span class='op'>&gt;</span> <span class='id identifier rubyid_threshold_density'>threshold_density</span>
671
- <span class='kw'>break</span>
672
- <span class='kw'>end</span>
673
- <span class='kw'>end</span>
674
-
675
- <span class='ivar'>@clusters</span> <span class='op'>=</span> <span class='id identifier rubyid_clusters'>clusters</span>
676
- <span class='id identifier rubyid_clusters'>clusters</span>
677
- <span class='kw'>end</span></pre>
678
- </td>
679
- </tr>
680
- </table>
681
- </div>
682
-
683
- <div class="method_details ">
684
- <h3 class="signature " id="hierarchical_clusterization_2d-instance_method">
685
-
686
- - (<tt>Object</tt>) <strong>hierarchical_clusterization_2d</strong>(no_clusters = 0, distance_method = 0, vec = @values, debug = false)
687
-
688
-
689
-
690
-
691
-
692
- </h3><table class="source_code">
693
- <tr>
694
- <td>
695
- <pre class="lines">
696
-
697
-
698
- 387
699
- 388
700
- 389
701
- 390
702
- 391
703
- 392
704
- 393
705
- 394
706
- 395
707
- 396
708
- 397
709
- 398
710
- 399
711
- 400
712
- 401
713
- 402
714
- 403
715
- 404
716
- 405
717
- 406
718
- 407
719
- 408
720
- 409
721
- 410
722
- 411
723
- 412
724
- 413
725
- 414
726
- 415
727
- 416
728
- 417
729
- 418
730
- 419
731
- 420
732
- 421
733
- 422
734
- 423
735
- 424
736
- 425
737
- 426
738
- 427
739
- 428
740
- 429
741
- 430
742
- 431
743
- 432
744
- 433
745
- 434
746
- 435
747
- 436
748
- 437
749
- 438
750
- 439
751
- 440
752
- 441
753
- 442
754
- 443
755
- 444
756
- 445
757
- 446
758
- 447
759
- 448
760
- 449
761
- 450
762
- 451
763
- 452
764
- 453
765
- 454
766
- 455
767
- 456
768
- 457
769
- 458
770
- 459
771
- 460
772
- 461
773
- 462
774
- 463
775
- 464
776
- 465
777
- 466
778
- 467
779
- 468
780
- 469
781
- 470
782
- 471
783
- 472
784
- 473
785
- 474
786
- 475
787
- 476
788
- 477
789
- 478
790
- 479
791
- 480
792
- 481
793
- 482
794
- 483
795
- 484
796
- 485
797
- 486
798
- 487
799
- 488
800
- 489
801
- 490</pre>
802
- </td>
803
- <td>
804
- <pre class="code"><span class="info file"># File 'lib/genevalidator/clusterization.rb', line 387</span>
805
-
806
- <span class='kw'>def</span> <span class='id identifier rubyid_hierarchical_clusterization_2d'>hierarchical_clusterization_2d</span> <span class='lparen'>(</span><span class='id identifier rubyid_no_clusters'>no_clusters</span> <span class='op'>=</span> <span class='int'>0</span><span class='comma'>,</span> <span class='id identifier rubyid_distance_method'>distance_method</span> <span class='op'>=</span> <span class='int'>0</span><span class='comma'>,</span> <span class='id identifier rubyid_vec'>vec</span> <span class='op'>=</span> <span class='ivar'>@values</span><span class='comma'>,</span> <span class='id identifier rubyid_debug'>debug</span> <span class='op'>=</span> <span class='kw'>false</span><span class='rparen'>)</span>
807
- <span class='id identifier rubyid_clusters'>clusters</span> <span class='op'>=</span> <span class='lbracket'>[</span><span class='rbracket'>]</span>
808
-
809
- <span class='kw'>if</span> <span class='id identifier rubyid_vec'>vec</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>==</span> <span class='int'>1</span>
810
- <span class='id identifier rubyid_hash'>hash</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='id identifier rubyid_vec'>vec</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='op'>=&gt;</span><span class='int'>1</span><span class='rbrace'>}</span>
811
- <span class='id identifier rubyid_cluster'>cluster</span> <span class='op'>=</span> <span class='const'>PairCluster</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='id identifier rubyid_hash'>hash</span><span class='rparen'>)</span>
812
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='id identifier rubyid_cluster'>cluster</span><span class='rparen'>)</span>
813
- <span class='id identifier rubyid_clusters'>clusters</span>
814
- <span class='kw'>end</span>
815
-
816
- <span class='comment'># Thresholds
817
- </span> <span class='comment'># threshold_distance = (0.25 * (vec.max-vec.min))
818
- </span> <span class='id identifier rubyid_threshold_density'>threshold_density</span> <span class='op'>=</span> <span class='lparen'>(</span><span class='float'>0.5</span> <span class='op'>*</span> <span class='id identifier rubyid_vec'>vec</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_to_i'>to_i</span>
819
-
820
- <span class='comment'># make a histogram from the input vector
821
- </span> <span class='id identifier rubyid_histogram'>histogram</span> <span class='op'>=</span> <span class='const'>Hash</span><span class='lbracket'>[</span><span class='id identifier rubyid_vec'>vec</span><span class='period'>.</span><span class='id identifier rubyid_group_by'>group_by</span><span class='lbrace'>{</span><span class='op'>|</span><span class='id identifier rubyid_a'>a</span><span class='op'>|</span> <span class='id identifier rubyid_a'>a</span><span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_map'>map</span> <span class='lbrace'>{</span> <span class='op'>|</span><span class='id identifier rubyid_k'>k</span><span class='comma'>,</span> <span class='id identifier rubyid_vs'>vs</span><span class='op'>|</span> <span class='lbracket'>[</span><span class='id identifier rubyid_k'>k</span><span class='comma'>,</span> <span class='id identifier rubyid_vs'>vs</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='rbracket'>]</span> <span class='rbrace'>}</span><span class='rbracket'>]</span>
822
-
823
- <span class='comment'># clusters = array of clusters
824
- </span> <span class='comment'># initially each length belongs to a different cluster
825
- </span> <span class='id identifier rubyid_histogram'>histogram</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_elem'>elem</span><span class='op'>|</span>
826
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
827
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>pair (</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_elem'>elem</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_x'>x</span><span class='rbrace'>}</span><span class='tstring_content'> </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_elem'>elem</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_y'>y</span><span class='rbrace'>}</span><span class='tstring_content'>) appears </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_elem'>elem</span><span class='lbracket'>[</span><span class='int'>1</span><span class='rbracket'>]</span><span class='rbrace'>}</span><span class='tstring_content'> times</span><span class='tstring_end'>&quot;</span></span>
828
- <span class='kw'>end</span>
829
- <span class='id identifier rubyid_hash'>hash</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='id identifier rubyid_elem'>elem</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span> <span class='op'>=&gt;</span> <span class='id identifier rubyid_elem'>elem</span><span class='lbracket'>[</span><span class='int'>1</span><span class='rbracket'>]</span><span class='rbrace'>}</span>
830
- <span class='id identifier rubyid_cluster'>cluster</span> <span class='op'>=</span> <span class='const'>PairCluster</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='id identifier rubyid_hash'>hash</span><span class='rparen'>)</span>
831
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='id identifier rubyid_cluster'>cluster</span><span class='rparen'>)</span>
832
- <span class='kw'>end</span>
833
-
834
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
835
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_elem'>elem</span><span class='op'>|</span>
836
- <span class='id identifier rubyid_elem'>elem</span><span class='period'>.</span><span class='id identifier rubyid_print'>print</span>
837
- <span class='kw'>end</span>
838
- <span class='kw'>end</span>
839
-
840
- <span class='kw'>if</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>==</span> <span class='int'>1</span>
841
- <span class='kw'>return</span> <span class='id identifier rubyid_clusters'>clusters</span>
842
- <span class='kw'>end</span>
843
-
844
- <span class='comment'># each iteration merge the closest two adiacent cluster
845
- </span> <span class='comment'># the loop stops according to the stop conditions
846
- </span> <span class='id identifier rubyid_iteration'>iteration</span> <span class='op'>=</span> <span class='int'>0</span>
847
- <span class='id identifier rubyid_loop'>loop</span> <span class='kw'>do</span>
848
- <span class='comment'>#stop condition 1
849
- </span> <span class='kw'>if</span> <span class='id identifier rubyid_no_clusters'>no_clusters</span> <span class='op'>!=</span> <span class='int'>0</span> <span class='kw'>and</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>==</span> <span class='id identifier rubyid_no_clusters'>no_clusters</span>
850
- <span class='kw'>break</span>
851
- <span class='kw'>end</span>
852
-
853
- <span class='id identifier rubyid_iteration'>iteration</span> <span class='op'>=</span> <span class='id identifier rubyid_iteration'>iteration</span> <span class='op'>+</span> <span class='int'>1</span>
854
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
855
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>\nIteration </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_iteration'>iteration</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
856
- <span class='kw'>end</span>
857
-
858
- <span class='id identifier rubyid_min_distance'>min_distance</span> <span class='op'>=</span> <span class='int'>100000000</span>
859
- <span class='id identifier rubyid_cluster1'>cluster1</span> <span class='op'>=</span> <span class='int'>0</span>
860
- <span class='id identifier rubyid_cluster2'>cluster2</span> <span class='op'>=</span> <span class='int'>0</span>
861
- <span class='id identifier rubyid_density'>density</span> <span class='op'>=</span> <span class='int'>0</span>
862
-
863
- <span class='lbracket'>[</span><span class='op'>*</span><span class='lparen'>(</span><span class='int'>0</span><span class='op'>..</span><span class='lparen'>(</span><span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>-</span><span class='int'>2</span><span class='rparen'>)</span><span class='rparen'>)</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_i'>i</span><span class='op'>|</span>
864
- <span class='lbracket'>[</span><span class='op'>*</span><span class='lparen'>(</span><span class='lparen'>(</span><span class='id identifier rubyid_i'>i</span><span class='op'>+</span><span class='int'>1</span><span class='rparen'>)</span><span class='op'>..</span><span class='lparen'>(</span><span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>-</span><span class='int'>1</span><span class='rparen'>)</span><span class='rparen'>)</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_j'>j</span><span class='op'>|</span>
865
- <span class='id identifier rubyid_dist'>dist</span> <span class='op'>=</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_i'>i</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_distance'>distance</span><span class='lparen'>(</span><span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_j'>j</span><span class='rbracket'>]</span><span class='comma'>,</span> <span class='id identifier rubyid_distance_method'>distance_method</span><span class='rparen'>)</span>
866
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
867
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>distance between clusters </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_i'>i</span><span class='rbrace'>}</span><span class='tstring_content'> and </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_j'>j</span><span class='rbrace'>}</span><span class='tstring_content'> is </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_dist'>dist</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
868
- <span class='kw'>end</span>
869
- <span class='id identifier rubyid_current_density'>current_density</span> <span class='op'>=</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_i'>i</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_density'>density</span> <span class='op'>+</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_j'>j</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_density'>density</span>
870
- <span class='kw'>if</span> <span class='id identifier rubyid_dist'>dist</span> <span class='op'>&lt;</span> <span class='id identifier rubyid_min_distance'>min_distance</span>
871
- <span class='id identifier rubyid_min_distance'>min_distance</span> <span class='op'>=</span> <span class='id identifier rubyid_dist'>dist</span>
872
- <span class='id identifier rubyid_cluster1'>cluster1</span> <span class='op'>=</span> <span class='id identifier rubyid_i'>i</span>
873
- <span class='id identifier rubyid_cluster2'>cluster2</span> <span class='op'>=</span> <span class='id identifier rubyid_j'>j</span>
874
- <span class='id identifier rubyid_density'>density</span> <span class='op'>=</span> <span class='id identifier rubyid_current_density'>current_density</span>
875
- <span class='kw'>else</span>
876
- <span class='kw'>if</span> <span class='id identifier rubyid_dist'>dist</span> <span class='op'>==</span> <span class='id identifier rubyid_min_distance'>min_distance</span> <span class='kw'>and</span> <span class='id identifier rubyid_density'>density</span> <span class='op'>&lt;</span> <span class='id identifier rubyid_current_density'>current_density</span>
877
- <span class='id identifier rubyid_cluster1'>cluster1</span> <span class='op'>=</span> <span class='id identifier rubyid_i'>i</span>
878
- <span class='id identifier rubyid_cluster2'>cluster2</span> <span class='op'>=</span> <span class='id identifier rubyid_j'>j</span>
879
- <span class='id identifier rubyid_density'>density</span> <span class='op'>=</span> <span class='id identifier rubyid_current_density'>current_density</span>
880
- <span class='kw'>end</span>
881
- <span class='kw'>end</span>
882
- <span class='kw'>end</span>
883
- <span class='kw'>end</span>
884
-
885
- <span class='comment'># merge clusters 'cluster1' and 'cluster2'
886
- </span> <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
887
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>clusters to merge </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_cluster1'>cluster1</span><span class='rbrace'>}</span><span class='tstring_content'> and </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_cluster2'>cluster2</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
888
- <span class='kw'>end</span>
889
- <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_cluster1'>cluster1</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_add'>add</span><span class='lparen'>(</span><span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_cluster2'>cluster2</span><span class='rbracket'>]</span><span class='rparen'>)</span>
890
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_delete_at'>delete_at</span><span class='lparen'>(</span><span class='id identifier rubyid_cluster2'>cluster2</span><span class='rparen'>)</span>
891
-
892
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
893
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_each_with_index'>each_with_index</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_elem'>elem</span><span class='comma'>,</span> <span class='id identifier rubyid_i'>i</span><span class='op'>|</span>
894
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>cluster </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_i'>i</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
895
- <span class='id identifier rubyid_elem'>elem</span><span class='period'>.</span><span class='id identifier rubyid_print'>print</span>
896
- <span class='kw'>end</span>
897
- <span class='kw'>end</span>
898
-
899
- <span class='comment'>#stop condition 3
900
- </span> <span class='comment'>#the density of the biggest clusters exceeds the threshold
901
- </span> <span class='kw'>if</span> <span class='id identifier rubyid_no_clusters'>no_clusters</span> <span class='op'>==</span> <span class='int'>0</span> <span class='kw'>and</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_cluster'>cluster</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_density'>density</span> <span class='op'>&gt;</span> <span class='id identifier rubyid_threshold_density'>threshold_density</span>
902
- <span class='kw'>break</span>
903
- <span class='kw'>end</span>
904
- <span class='kw'>end</span>
905
-
906
- <span class='ivar'>@clusters</span> <span class='op'>=</span> <span class='id identifier rubyid_clusters'>clusters</span>
907
- <span class='id identifier rubyid_clusters'>clusters</span>
908
-
909
- <span class='kw'>end</span></pre>
910
- </td>
911
- </tr>
912
- </table>
913
- </div>
914
-
915
- <div class="method_details ">
916
- <h3 class="signature " id="most_dense_cluster-instance_method">
917
-
918
- - (<tt>Object</tt>) <strong>most_dense_cluster</strong>(clusters = @clusters)
919
-
920
-
921
-
922
-
923
-
924
- </h3><div class="docstring">
925
- <div class="discussion">
926
-
927
- <p>Returns the cluster with the maimum density Params: <tt>clusters</tt>: list
928
- of <tt>Clususter</tt> objects</p>
929
-
930
-
931
- </div>
932
- </div>
933
- <div class="tags">
934
-
935
-
936
- </div><table class="source_code">
937
- <tr>
938
- <td>
939
- <pre class="lines">
940
-
941
-
942
- 616
943
- 617
944
- 618
945
- 619
946
- 620
947
- 621
948
- 622
949
- 623
950
- 624
951
- 625
952
- 626
953
- 627
954
- 628
955
- 629
956
- 630
957
- 631</pre>
958
- </td>
959
- <td>
960
- <pre class="code"><span class="info file"># File 'lib/genevalidator/clusterization.rb', line 616</span>
961
-
962
- <span class='kw'>def</span> <span class='id identifier rubyid_most_dense_cluster'>most_dense_cluster</span><span class='lparen'>(</span><span class='id identifier rubyid_clusters'>clusters</span> <span class='op'>=</span> <span class='ivar'>@clusters</span><span class='rparen'>)</span>
963
- <span class='id identifier rubyid_max_density'>max_density</span> <span class='op'>=</span> <span class='int'>0</span><span class='semicolon'>;</span>
964
- <span class='id identifier rubyid_max_density_cluster'>max_density_cluster</span> <span class='op'>=</span> <span class='int'>0</span><span class='semicolon'>;</span>
965
-
966
- <span class='kw'>if</span> <span class='id identifier rubyid_clusters'>clusters</span> <span class='op'>==</span> <span class='kw'>nil</span>
967
- <span class='kw'>nil</span>
968
- <span class='kw'>end</span>
969
-
970
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_each_with_index'>each_with_index</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_item'>item</span><span class='comma'>,</span> <span class='id identifier rubyid_i'>i</span><span class='op'>|</span>
971
- <span class='kw'>if</span> <span class='id identifier rubyid_item'>item</span><span class='period'>.</span><span class='id identifier rubyid_density'>density</span> <span class='op'>&gt;</span> <span class='id identifier rubyid_max_density'>max_density</span>
972
- <span class='id identifier rubyid_max_density'>max_density</span> <span class='op'>=</span> <span class='id identifier rubyid_item'>item</span><span class='period'>.</span><span class='id identifier rubyid_density'>density</span>
973
- <span class='id identifier rubyid_max_density_cluster'>max_density_cluster</span> <span class='op'>=</span> <span class='id identifier rubyid_i'>i</span><span class='semicolon'>;</span>
974
- <span class='kw'>end</span>
975
- <span class='kw'>end</span>
976
- <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_max_density_cluster'>max_density_cluster</span><span class='rbracket'>]</span>
977
- <span class='kw'>end</span></pre>
978
- </td>
979
- </tr>
980
- </table>
981
- </div>
982
-
983
- </div>
984
-
985
- </div>
986
-
987
- <div id="footer">
988
- Generated on Sat Sep 28 07:01:33 2013 by
989
- <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
990
- 0.8.7.2 (ruby-1.9.3).
991
- </div>
992
-
993
- </body>
994
- </html>