genevalidator 1.6.1 → 1.6.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (131) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -1
  3. data/.travis.yml +2 -0
  4. data/README.md +78 -30
  5. data/Rakefile +11 -8
  6. data/aux/app_template_footer.erb +1 -6
  7. data/aux/app_template_header.erb +12 -32
  8. data/aux/files/css/style.css +2 -8
  9. data/aux/files/js/plots.js +564 -576
  10. data/aux/files/js/script.js +10 -0
  11. data/aux/json_footer.erb +8 -0
  12. data/aux/json_header.erb +19 -0
  13. data/aux/json_query.erb +14 -0
  14. data/aux/template_footer.erb +9 -58
  15. data/aux/template_header.erb +18 -58
  16. data/aux/template_query.erb +8 -36
  17. data/bin/genevalidator +45 -32
  18. data/genevalidator.gemspec +11 -7
  19. data/lib/genevalidator.rb +75 -455
  20. data/lib/genevalidator/arg_validation.rb +78 -107
  21. data/lib/genevalidator/blast.rb +57 -60
  22. data/lib/genevalidator/clusterization.rb +15 -15
  23. data/lib/genevalidator/exceptions.rb +32 -5
  24. data/lib/genevalidator/get_raw_sequences.rb +70 -33
  25. data/lib/genevalidator/hsp.rb +1 -4
  26. data/lib/genevalidator/json_to_gv_results.rb +109 -0
  27. data/lib/genevalidator/output.rb +177 -185
  28. data/lib/genevalidator/pool.rb +2 -1
  29. data/lib/genevalidator/sequences.rb +3 -3
  30. data/lib/genevalidator/tabular_parser.rb +24 -18
  31. data/lib/genevalidator/validation.rb +279 -0
  32. data/lib/genevalidator/validation_alignment.rb +31 -47
  33. data/lib/genevalidator/validation_blast_reading_frame.rb +19 -18
  34. data/lib/genevalidator/validation_duplication.rb +23 -19
  35. data/lib/genevalidator/validation_gene_merge.rb +30 -65
  36. data/lib/genevalidator/validation_length_cluster.rb +14 -53
  37. data/lib/genevalidator/validation_length_rank.rb +10 -11
  38. data/lib/genevalidator/validation_open_reading_frame.rb +18 -19
  39. data/lib/genevalidator/validation_report.rb +2 -5
  40. data/lib/genevalidator/validation_test.rb +8 -4
  41. data/lib/genevalidator/version.rb +1 -1
  42. data/test/test_all_validations.rb +51 -66
  43. data/test/test_blast.rb +68 -51
  44. data/test/test_clusterization.rb +1 -1
  45. data/test/test_clusterization_2d.rb +19 -13
  46. data/test/test_extended_array_methods.rb +1 -1
  47. data/test/test_files/all_validations_mrna/mrna.blast_tab6 +1806 -0
  48. data/test/test_files/all_validations_mrna/mrna.blast_tab7 +1865 -0
  49. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml → mrna.blast_xml} +18642 -1
  50. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.index → mrna.blast_xml.index} +300 -0
  51. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta → mrna.fa} +0 -0
  52. data/test/test_files/all_validations_mrna/mrna.raw_seq +3970 -0
  53. data/test/test_files/all_validations_mrna/{all_validations_mrna.fasta.blast_xml.raw_seq.idx → mrna.raw_seq.idx} +901 -1
  54. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_tab → prot.blast_tab6} +416 -0
  55. data/test/test_files/all_validations_prot/prot.blast_tab7 +2400 -0
  56. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml → prot.blast_xml} +18299 -6723
  57. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.index → prot.blast_xml.index} +408 -0
  58. data/test/test_files/all_validations_prot/{all_validations_prot.fasta → prot.fa} +0 -0
  59. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq → prot.raw_seq} +2735 -0
  60. data/test/test_files/all_validations_prot/{all_validations_prot.fasta.blast_xml.raw_seq.idx → prot.raw_seq.idx} +3032 -1808
  61. data/test/test_sequences.rb +46 -41
  62. data/test/test_validation_open_reading_frame.rb +318 -202
  63. data/test/test_validations.rb +48 -32
  64. metadata +76 -102
  65. data/doc/AliasDuplicationError.html +0 -134
  66. data/doc/AlignmentValidation.html +0 -1687
  67. data/doc/AlignmentValidationOutput.html +0 -659
  68. data/doc/Blast.html +0 -1905
  69. data/doc/BlastRFValidationOutput.html +0 -545
  70. data/doc/BlastReadingFrameValidation.html +0 -370
  71. data/doc/BlastUtils.html +0 -875
  72. data/doc/ClasspathError.html +0 -134
  73. data/doc/Cluster.html +0 -1316
  74. data/doc/DuplciationValidationOutput.html +0 -564
  75. data/doc/DuplicationValidation.html +0 -920
  76. data/doc/DuplicationValidationOutput.html +0 -564
  77. data/doc/FileNotFoundException.html +0 -134
  78. data/doc/GeneMergeValidation.html +0 -935
  79. data/doc/GeneMergeValidationOutput.html +0 -652
  80. data/doc/HierarchicalClusterization.html +0 -994
  81. data/doc/Hsp.html +0 -1485
  82. data/doc/InconsistentTabularFormat.html +0 -135
  83. data/doc/LengthClusterValidation.html +0 -982
  84. data/doc/LengthClusterValidationOutput.html +0 -515
  85. data/doc/LengthRankValidation.html +0 -496
  86. data/doc/LengthRankValidationOutput.html +0 -517
  87. data/doc/NoInternetError.html +0 -135
  88. data/doc/NoMafftInstallationError.html +0 -134
  89. data/doc/NoPIdentError.html +0 -134
  90. data/doc/NoValidationError.html +0 -134
  91. data/doc/NotEnoughHitsError.html +0 -135
  92. data/doc/ORFValidationOutput.html +0 -593
  93. data/doc/OpenReadingFrameValidation.html +0 -1107
  94. data/doc/OtherError.html +0 -123
  95. data/doc/Output.html +0 -1540
  96. data/doc/Pair.html +0 -309
  97. data/doc/PairCluster.html +0 -767
  98. data/doc/Plot.html +0 -837
  99. data/doc/QueryError.html +0 -134
  100. data/doc/ReportClassError.html +0 -135
  101. data/doc/Sequence.html +0 -1299
  102. data/doc/SequenceTypeError.html +0 -135
  103. data/doc/TabularEntry.html +0 -837
  104. data/doc/TabularParser.html +0 -1104
  105. data/doc/Validation.html +0 -2147
  106. data/doc/ValidationClassError.html +0 -134
  107. data/doc/ValidationOutput.html +0 -460
  108. data/doc/ValidationReport.html +0 -940
  109. data/doc/ValidationTest.html +0 -939
  110. data/doc/_index.html +0 -449
  111. data/doc/class_list.html +0 -54
  112. data/doc/css/common.css +0 -1
  113. data/doc/css/full_list.css +0 -57
  114. data/doc/css/style.css +0 -338
  115. data/doc/file.README.html +0 -151
  116. data/doc/file_list.html +0 -56
  117. data/doc/frames.html +0 -26
  118. data/doc/index.html +0 -151
  119. data/doc/js/app.js +0 -214
  120. data/doc/js/full_list.js +0 -178
  121. data/doc/js/jquery.js +0 -4
  122. data/doc/method_list.html +0 -1505
  123. data/doc/top-level-namespace.html +0 -112
  124. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab +0 -967
  125. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.index +0 -967
  126. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq +0 -4929
  127. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_tab.raw_seq.idx +0 -1006
  128. data/test/test_files/all_validations_mrna/all_validations_mrna.fasta.blast_xml.raw_seq +0 -2075
  129. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.index +0 -1864
  130. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq +0 -42411
  131. data/test/test_files/all_validations_prot/all_validations_prot.fasta.blast_tab.raw_seq.idx +0 -3751
@@ -1,994 +0,0 @@
1
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
2
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4
- <head>
5
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
6
- <title>
7
- Class: HierarchicalClusterization
8
-
9
- &mdash; Documentation by YARD 0.8.7.2
10
-
11
- </title>
12
-
13
- <link rel="stylesheet" href="css/style.css" type="text/css" charset="utf-8" />
14
-
15
- <link rel="stylesheet" href="css/common.css" type="text/css" charset="utf-8" />
16
-
17
- <script type="text/javascript" charset="utf-8">
18
- hasFrames = window.top.frames.main ? true : false;
19
- relpath = '';
20
- framesUrl = "frames.html#!" + escape(window.location.href);
21
- </script>
22
-
23
-
24
- <script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
25
-
26
- <script type="text/javascript" charset="utf-8" src="js/app.js"></script>
27
-
28
-
29
- </head>
30
- <body>
31
- <div id="header">
32
- <div id="menu">
33
-
34
- <a href="_index.html">Index (H)</a> &raquo;
35
-
36
-
37
- <span class="title">HierarchicalClusterization</span>
38
-
39
-
40
- <div class="noframes"><span class="title">(</span><a href="." target="_top">no frames</a><span class="title">)</span></div>
41
- </div>
42
-
43
- <div id="search">
44
-
45
- <a class="full_list_link" id="class_list_link"
46
- href="class_list.html">
47
- Class List
48
- </a>
49
-
50
- <a class="full_list_link" id="method_list_link"
51
- href="method_list.html">
52
- Method List
53
- </a>
54
-
55
- <a class="full_list_link" id="file_list_link"
56
- href="file_list.html">
57
- File List
58
- </a>
59
-
60
- </div>
61
- <div class="clear"></div>
62
- </div>
63
-
64
- <iframe id="search_frame"></iframe>
65
-
66
- <div id="content"><h1>Class: HierarchicalClusterization
67
-
68
-
69
-
70
- </h1>
71
-
72
- <dl class="box">
73
-
74
- <dt class="r1">Inherits:</dt>
75
- <dd class="r1">
76
- <span class="inheritName">Object</span>
77
-
78
- <ul class="fullTree">
79
- <li>Object</li>
80
-
81
- <li class="next">HierarchicalClusterization</li>
82
-
83
- </ul>
84
- <a href="#" class="inheritanceTree">show all</a>
85
-
86
- </dd>
87
-
88
-
89
-
90
-
91
-
92
-
93
-
94
-
95
-
96
- <dt class="r2 last">Defined in:</dt>
97
- <dd class="r2 last">lib/genevalidator/clusterization.rb</dd>
98
-
99
- </dl>
100
- <div class="clear"></div>
101
-
102
-
103
-
104
-
105
-
106
- <h2>Instance Attribute Summary <small>(<a href="#" class="summary_toggle">collapse</a>)</small></h2>
107
- <ul class="summary">
108
-
109
- <li class="public ">
110
- <span class="summary_signature">
111
-
112
- <a href="#clusters-instance_method" title="#clusters (instance method)">- (Object) <strong>clusters</strong> </a>
113
-
114
-
115
-
116
- </span>
117
-
118
-
119
-
120
-
121
-
122
-
123
-
124
-
125
-
126
-
127
-
128
-
129
- <span class="summary_desc"><div class='inline'>
130
- <p>Returns the value of attribute clusters.</p>
131
- </div></span>
132
-
133
- </li>
134
-
135
-
136
- <li class="public ">
137
- <span class="summary_signature">
138
-
139
- <a href="#values-instance_method" title="#values (instance method)">- (Object) <strong>values</strong> </a>
140
-
141
-
142
-
143
- </span>
144
-
145
-
146
-
147
-
148
-
149
-
150
-
151
-
152
-
153
-
154
-
155
-
156
- <span class="summary_desc"><div class='inline'>
157
- <p>Returns the value of attribute values.</p>
158
- </div></span>
159
-
160
- </li>
161
-
162
-
163
- </ul>
164
-
165
-
166
-
167
-
168
-
169
- <h2>
170
- Instance Method Summary
171
- <small>(<a href="#" class="summary_toggle">collapse</a>)</small>
172
- </h2>
173
-
174
- <ul class="summary">
175
-
176
- <li class="public ">
177
- <span class="summary_signature">
178
-
179
- <a href="#hierarchical_clusterization-instance_method" title="#hierarchical_clusterization (instance method)">- (Object) <strong>hierarchical_clusterization</strong>(no_clusters = 0, distance_method = 0, vec = @values, debug = false) </a>
180
-
181
-
182
-
183
- </span>
184
-
185
-
186
-
187
-
188
-
189
-
190
-
191
-
192
-
193
- <span class="summary_desc"><div class='inline'>
194
- <p>Makes an hierarchical clusterization until the most dense cluster is
195
- obtained or the distance between clusters is sufficintly big or the
196
- desired number of clusters is obtained Params: <tt>vec</tt>: a vector of
197
- values (by default the values from initialization) <tt>no_clusters</tt>:
198
- stop test (number of clusters) <tt>distance_method</tt>: distance method
199
- (method 0 or method 1) <tt>debug</tt>: display debug information Output:
200
- vector of <tt>Cluster</tt> objects.</p>
201
- </div></span>
202
-
203
- </li>
204
-
205
-
206
- <li class="public ">
207
- <span class="summary_signature">
208
-
209
- <a href="#hierarchical_clusterization_2d-instance_method" title="#hierarchical_clusterization_2d (instance method)">- (Object) <strong>hierarchical_clusterization_2d</strong>(no_clusters = 0, distance_method = 0, vec = @values, debug = false) </a>
210
-
211
-
212
-
213
- </span>
214
-
215
-
216
-
217
-
218
-
219
-
220
-
221
-
222
-
223
- <span class="summary_desc"><div class='inline'></div></span>
224
-
225
- </li>
226
-
227
-
228
- <li class="public ">
229
- <span class="summary_signature">
230
-
231
- <a href="#initialize-instance_method" title="#initialize (instance method)">- (HierarchicalClusterization) <strong>initialize</strong>(values) </a>
232
-
233
-
234
-
235
- </span>
236
-
237
-
238
- <span class="note title constructor">constructor</span>
239
-
240
-
241
-
242
-
243
-
244
-
245
-
246
-
247
- <span class="summary_desc"><div class='inline'>
248
- <p>Object initialization Params: <tt>values</tt> :vector of values.</p>
249
- </div></span>
250
-
251
- </li>
252
-
253
-
254
- <li class="public ">
255
- <span class="summary_signature">
256
-
257
- <a href="#most_dense_cluster-instance_method" title="#most_dense_cluster (instance method)">- (Object) <strong>most_dense_cluster</strong>(clusters = @clusters) </a>
258
-
259
-
260
-
261
- </span>
262
-
263
-
264
-
265
-
266
-
267
-
268
-
269
-
270
-
271
- <span class="summary_desc"><div class='inline'>
272
- <p>Returns the cluster with the maimum density Params: <tt>clusters</tt>: list
273
- of <tt>Clususter</tt> objects.</p>
274
- </div></span>
275
-
276
- </li>
277
-
278
-
279
- </ul>
280
-
281
-
282
- <div id="constructor_details" class="method_details_list">
283
- <h2>Constructor Details</h2>
284
-
285
- <div class="method_details first">
286
- <h3 class="signature first" id="initialize-instance_method">
287
-
288
- - (<tt><span class='object_link'><a href="" title="HierarchicalClusterization (class)">HierarchicalClusterization</a></span></tt>) <strong>initialize</strong>(values)
289
-
290
-
291
-
292
-
293
-
294
- </h3><div class="docstring">
295
- <div class="discussion">
296
-
297
- <p>Object initialization Params: <tt>values</tt> :vector of values</p>
298
-
299
-
300
- </div>
301
- </div>
302
- <div class="tags">
303
-
304
-
305
- </div><table class="source_code">
306
- <tr>
307
- <td>
308
- <pre class="lines">
309
-
310
-
311
- 382
312
- 383
313
- 384
314
- 385</pre>
315
- </td>
316
- <td>
317
- <pre class="code"><span class="info file"># File 'lib/genevalidator/clusterization.rb', line 382</span>
318
-
319
- <span class='kw'>def</span> <span class='id identifier rubyid_initialize'>initialize</span><span class='lparen'>(</span><span class='id identifier rubyid_values'>values</span><span class='rparen'>)</span>
320
- <span class='ivar'>@values</span> <span class='op'>=</span> <span class='id identifier rubyid_values'>values</span>
321
- <span class='ivar'>@clusters</span> <span class='op'>=</span> <span class='lbracket'>[</span><span class='rbracket'>]</span>
322
- <span class='kw'>end</span></pre>
323
- </td>
324
- </tr>
325
- </table>
326
- </div>
327
-
328
- </div>
329
-
330
- <div id="instance_attr_details" class="attr_details">
331
- <h2>Instance Attribute Details</h2>
332
-
333
-
334
- <span id="clusters=-instance_method"></span>
335
- <div class="method_details first">
336
- <h3 class="signature first" id="clusters-instance_method">
337
-
338
- - (<tt>Object</tt>) <strong>clusters</strong>
339
-
340
-
341
-
342
-
343
-
344
- </h3><div class="docstring">
345
- <div class="discussion">
346
-
347
- <p>Returns the value of attribute clusters</p>
348
-
349
-
350
- </div>
351
- </div>
352
- <div class="tags">
353
-
354
-
355
- </div><table class="source_code">
356
- <tr>
357
- <td>
358
- <pre class="lines">
359
-
360
-
361
- 376
362
- 377
363
- 378</pre>
364
- </td>
365
- <td>
366
- <pre class="code"><span class="info file"># File 'lib/genevalidator/clusterization.rb', line 376</span>
367
-
368
- <span class='kw'>def</span> <span class='id identifier rubyid_clusters'>clusters</span>
369
- <span class='ivar'>@clusters</span>
370
- <span class='kw'>end</span></pre>
371
- </td>
372
- </tr>
373
- </table>
374
- </div>
375
-
376
-
377
- <span id="values=-instance_method"></span>
378
- <div class="method_details ">
379
- <h3 class="signature " id="values-instance_method">
380
-
381
- - (<tt>Object</tt>) <strong>values</strong>
382
-
383
-
384
-
385
-
386
-
387
- </h3><div class="docstring">
388
- <div class="discussion">
389
-
390
- <p>Returns the value of attribute values</p>
391
-
392
-
393
- </div>
394
- </div>
395
- <div class="tags">
396
-
397
-
398
- </div><table class="source_code">
399
- <tr>
400
- <td>
401
- <pre class="lines">
402
-
403
-
404
- 375
405
- 376
406
- 377</pre>
407
- </td>
408
- <td>
409
- <pre class="code"><span class="info file"># File 'lib/genevalidator/clusterization.rb', line 375</span>
410
-
411
- <span class='kw'>def</span> <span class='id identifier rubyid_values'>values</span>
412
- <span class='ivar'>@values</span>
413
- <span class='kw'>end</span></pre>
414
- </td>
415
- </tr>
416
- </table>
417
- </div>
418
-
419
- </div>
420
-
421
-
422
- <div id="instance_method_details" class="method_details_list">
423
- <h2>Instance Method Details</h2>
424
-
425
-
426
- <div class="method_details first">
427
- <h3 class="signature first" id="hierarchical_clusterization-instance_method">
428
-
429
- - (<tt>Object</tt>) <strong>hierarchical_clusterization</strong>(no_clusters = 0, distance_method = 0, vec = @values, debug = false)
430
-
431
-
432
-
433
-
434
-
435
- </h3><div class="docstring">
436
- <div class="discussion">
437
-
438
- <p>Makes an hierarchical clusterization until the most dense cluster is
439
- obtained or the distance between clusters is sufficintly big or the
440
- desired number of clusters is obtained Params: <tt>vec</tt>: a vector of
441
- values (by default the values from initialization) <tt>no_clusters</tt>:
442
- stop test (number of clusters) <tt>distance_method</tt>: distance method
443
- (method 0 or method 1) <tt>debug</tt>: display debug information Output:
444
- vector of <tt>Cluster</tt> objects</p>
445
-
446
-
447
- </div>
448
- </div>
449
- <div class="tags">
450
-
451
-
452
- </div><table class="source_code">
453
- <tr>
454
- <td>
455
- <pre class="lines">
456
-
457
-
458
- 503
459
- 504
460
- 505
461
- 506
462
- 507
463
- 508
464
- 509
465
- 510
466
- 511
467
- 512
468
- 513
469
- 514
470
- 515
471
- 516
472
- 517
473
- 518
474
- 519
475
- 520
476
- 521
477
- 522
478
- 523
479
- 524
480
- 525
481
- 526
482
- 527
483
- 528
484
- 529
485
- 530
486
- 531
487
- 532
488
- 533
489
- 534
490
- 535
491
- 536
492
- 537
493
- 538
494
- 539
495
- 540
496
- 541
497
- 542
498
- 543
499
- 544
500
- 545
501
- 546
502
- 547
503
- 548
504
- 549
505
- 550
506
- 551
507
- 552
508
- 553
509
- 554
510
- 555
511
- 556
512
- 557
513
- 558
514
- 559
515
- 560
516
- 561
517
- 562
518
- 563
519
- 564
520
- 565
521
- 566
522
- 567
523
- 568
524
- 569
525
- 570
526
- 571
527
- 572
528
- 573
529
- 574
530
- 575
531
- 576
532
- 577
533
- 578
534
- 579
535
- 580
536
- 581
537
- 582
538
- 583
539
- 584
540
- 585
541
- 586
542
- 587
543
- 588
544
- 589
545
- 590
546
- 591
547
- 592
548
- 593
549
- 594
550
- 595
551
- 596
552
- 597
553
- 598
554
- 599
555
- 600
556
- 601
557
- 602
558
- 603
559
- 604
560
- 605
561
- 606
562
- 607
563
- 608
564
- 609
565
- 610</pre>
566
- </td>
567
- <td>
568
- <pre class="code"><span class="info file"># File 'lib/genevalidator/clusterization.rb', line 503</span>
569
-
570
- <span class='kw'>def</span> <span class='id identifier rubyid_hierarchical_clusterization'>hierarchical_clusterization</span> <span class='lparen'>(</span><span class='id identifier rubyid_no_clusters'>no_clusters</span> <span class='op'>=</span> <span class='int'>0</span><span class='comma'>,</span> <span class='id identifier rubyid_distance_method'>distance_method</span> <span class='op'>=</span> <span class='int'>0</span><span class='comma'>,</span> <span class='id identifier rubyid_vec'>vec</span> <span class='op'>=</span> <span class='ivar'>@values</span><span class='comma'>,</span> <span class='id identifier rubyid_debug'>debug</span> <span class='op'>=</span> <span class='kw'>false</span><span class='rparen'>)</span>
571
- <span class='id identifier rubyid_clusters'>clusters</span> <span class='op'>=</span> <span class='lbracket'>[</span><span class='rbracket'>]</span>
572
- <span class='id identifier rubyid_vec'>vec</span> <span class='op'>=</span> <span class='id identifier rubyid_vec'>vec</span><span class='period'>.</span><span class='id identifier rubyid_sort'>sort</span>
573
-
574
- <span class='kw'>if</span> <span class='id identifier rubyid_vec'>vec</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>==</span> <span class='int'>1</span>
575
- <span class='id identifier rubyid_hash'>hash</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='id identifier rubyid_vec'>vec</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='op'>=&gt;</span><span class='int'>1</span><span class='rbrace'>}</span>
576
- <span class='id identifier rubyid_cluster'>cluster</span> <span class='op'>=</span> <span class='const'>Cluster</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='id identifier rubyid_hash'>hash</span><span class='rparen'>)</span>
577
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='id identifier rubyid_cluster'>cluster</span><span class='rparen'>)</span>
578
- <span class='id identifier rubyid_clusters'>clusters</span>
579
- <span class='kw'>end</span>
580
-
581
- <span class='comment'># Thresholds
582
- </span> <span class='id identifier rubyid_threshold_distance'>threshold_distance</span> <span class='op'>=</span> <span class='lparen'>(</span><span class='float'>0.25</span> <span class='op'>*</span> <span class='lparen'>(</span><span class='id identifier rubyid_vec'>vec</span><span class='period'>.</span><span class='id identifier rubyid_max'>max</span><span class='op'>-</span><span class='id identifier rubyid_vec'>vec</span><span class='period'>.</span><span class='id identifier rubyid_min'>min</span><span class='rparen'>)</span><span class='rparen'>)</span>
583
- <span class='id identifier rubyid_threshold_density'>threshold_density</span> <span class='op'>=</span> <span class='lparen'>(</span><span class='float'>0.5</span> <span class='op'>*</span> <span class='id identifier rubyid_vec'>vec</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_to_i'>to_i</span>
584
-
585
- <span class='comment'># make a histogram from the input vector
586
- </span> <span class='id identifier rubyid_histogram'>histogram</span> <span class='op'>=</span> <span class='const'>Hash</span><span class='lbracket'>[</span><span class='id identifier rubyid_vec'>vec</span><span class='period'>.</span><span class='id identifier rubyid_group_by'>group_by</span> <span class='lbrace'>{</span> <span class='op'>|</span><span class='id identifier rubyid_x'>x</span><span class='op'>|</span> <span class='id identifier rubyid_x'>x</span> <span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_map'>map</span> <span class='lbrace'>{</span> <span class='op'>|</span><span class='id identifier rubyid_k'>k</span><span class='comma'>,</span> <span class='id identifier rubyid_vs'>vs</span><span class='op'>|</span> <span class='lbracket'>[</span><span class='id identifier rubyid_k'>k</span><span class='comma'>,</span> <span class='id identifier rubyid_vs'>vs</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='rbracket'>]</span> <span class='rbrace'>}</span><span class='rbracket'>]</span>
587
-
588
- <span class='comment'># clusters = array of clusters
589
- </span> <span class='comment'>#initially each length belongs to a different cluster
590
- </span> <span class='id identifier rubyid_histogram'>histogram</span><span class='period'>.</span><span class='id identifier rubyid_sort'>sort</span> <span class='lbrace'>{</span><span class='op'>|</span><span class='id identifier rubyid_a'>a</span><span class='comma'>,</span><span class='id identifier rubyid_b'>b</span><span class='op'>|</span> <span class='id identifier rubyid_a'>a</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='op'>&lt;=&gt;</span><span class='id identifier rubyid_b'>b</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_elem'>elem</span><span class='op'>|</span>
591
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
592
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>len </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_elem'>elem</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='rbrace'>}</span><span class='tstring_content'> appears </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_elem'>elem</span><span class='lbracket'>[</span><span class='int'>1</span><span class='rbracket'>]</span><span class='rbrace'>}</span><span class='tstring_content'> times</span><span class='tstring_end'>&quot;</span></span>
593
- <span class='kw'>end</span>
594
- <span class='id identifier rubyid_hash'>hash</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='id identifier rubyid_elem'>elem</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span> <span class='op'>=&gt;</span> <span class='id identifier rubyid_elem'>elem</span><span class='lbracket'>[</span><span class='int'>1</span><span class='rbracket'>]</span><span class='rbrace'>}</span>
595
- <span class='id identifier rubyid_cluster'>cluster</span> <span class='op'>=</span> <span class='const'>Cluster</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='id identifier rubyid_hash'>hash</span><span class='rparen'>)</span>
596
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='id identifier rubyid_cluster'>cluster</span><span class='rparen'>)</span>
597
- <span class='kw'>end</span>
598
-
599
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
600
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_elem'>elem</span><span class='op'>|</span>
601
- <span class='id identifier rubyid_elem'>elem</span><span class='period'>.</span><span class='id identifier rubyid_print'>print</span>
602
- <span class='kw'>end</span>
603
- <span class='kw'>end</span>
604
-
605
- <span class='kw'>if</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>==</span> <span class='int'>1</span>
606
- <span class='kw'>return</span> <span class='id identifier rubyid_clusters'>clusters</span>
607
- <span class='kw'>end</span>
608
-
609
- <span class='comment'># each iteration merge the closest two adiacent cluster
610
- </span> <span class='comment'># the loop stops according to the stop conditions
611
- </span> <span class='id identifier rubyid_iteration'>iteration</span> <span class='op'>=</span> <span class='int'>0</span>
612
- <span class='id identifier rubyid_loop'>loop</span> <span class='kw'>do</span>
613
-
614
- <span class='comment'>#stop condition 1
615
- </span> <span class='kw'>if</span> <span class='id identifier rubyid_no_clusters'>no_clusters</span> <span class='op'>!=</span> <span class='int'>0</span> <span class='kw'>and</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>==</span> <span class='id identifier rubyid_no_clusters'>no_clusters</span>
616
- <span class='kw'>break</span>
617
- <span class='kw'>end</span>
618
-
619
- <span class='id identifier rubyid_iteration'>iteration</span> <span class='op'>=</span> <span class='id identifier rubyid_iteration'>iteration</span> <span class='op'>+</span> <span class='int'>1</span>
620
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
621
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>\nIteration </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_iteration'>iteration</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
622
- <span class='kw'>end</span>
623
-
624
- <span class='id identifier rubyid_min_distance'>min_distance</span> <span class='op'>=</span> <span class='int'>100000000</span>
625
- <span class='id identifier rubyid_cluster'>cluster</span> <span class='op'>=</span> <span class='int'>0</span>
626
- <span class='id identifier rubyid_density'>density</span> <span class='op'>=</span> <span class='int'>0</span>
627
-
628
- <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='int'>0</span><span class='op'>..</span><span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>-</span><span class='int'>2</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_each_with_index'>each_with_index</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_item'>item</span><span class='comma'>,</span> <span class='id identifier rubyid_i'>i</span><span class='op'>|</span>
629
- <span class='id identifier rubyid_dist'>dist</span> <span class='op'>=</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_i'>i</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_distance'>distance</span><span class='lparen'>(</span><span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_i'>i</span><span class='op'>+</span><span class='int'>1</span><span class='rbracket'>]</span><span class='comma'>,</span> <span class='id identifier rubyid_distance_method'>distance_method</span><span class='rparen'>)</span>
630
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
631
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>distance between clusters </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_i'>i</span><span class='rbrace'>}</span><span class='tstring_content'> and </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_i'>i</span><span class='op'>+</span><span class='int'>1</span><span class='rbrace'>}</span><span class='tstring_content'> is </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_dist'>dist</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
632
- <span class='kw'>end</span>
633
- <span class='id identifier rubyid_current_density'>current_density</span> <span class='op'>=</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_i'>i</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_density'>density</span> <span class='op'>+</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_i'>i</span><span class='op'>+</span><span class='int'>1</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_density'>density</span>
634
- <span class='kw'>if</span> <span class='id identifier rubyid_dist'>dist</span> <span class='op'>&lt;</span> <span class='id identifier rubyid_min_distance'>min_distance</span>
635
- <span class='id identifier rubyid_min_distance'>min_distance</span> <span class='op'>=</span> <span class='id identifier rubyid_dist'>dist</span>
636
- <span class='id identifier rubyid_cluster'>cluster</span> <span class='op'>=</span> <span class='id identifier rubyid_i'>i</span>
637
- <span class='id identifier rubyid_density'>density</span> <span class='op'>=</span> <span class='id identifier rubyid_current_density'>current_density</span>
638
- <span class='kw'>else</span>
639
- <span class='kw'>if</span> <span class='id identifier rubyid_dist'>dist</span> <span class='op'>==</span> <span class='id identifier rubyid_min_distance'>min_distance</span> <span class='kw'>and</span> <span class='id identifier rubyid_density'>density</span> <span class='op'>&lt;</span> <span class='id identifier rubyid_current_density'>current_density</span>
640
- <span class='id identifier rubyid_cluster'>cluster</span> <span class='op'>=</span> <span class='id identifier rubyid_i'>i</span>
641
- <span class='id identifier rubyid_density'>density</span> <span class='op'>=</span> <span class='id identifier rubyid_current_density'>current_density</span>
642
- <span class='kw'>end</span>
643
- <span class='kw'>end</span>
644
- <span class='kw'>end</span>
645
-
646
-
647
- <span class='comment'>#stop condition 2
648
- </span> <span class='comment'>#the distance between the closest clusters exceeds the threshold
649
- </span> <span class='kw'>if</span> <span class='id identifier rubyid_no_clusters'>no_clusters</span> <span class='op'>==</span> <span class='int'>0</span> <span class='kw'>and</span> <span class='lparen'>(</span><span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_cluster'>cluster</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_mean'>mean</span> <span class='op'>-</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_cluster'>cluster</span><span class='op'>+</span><span class='int'>1</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_mean'>mean</span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_abs'>abs</span> <span class='op'>&gt;</span> <span class='id identifier rubyid_threshold_distance'>threshold_distance</span>
650
- <span class='kw'>break</span>
651
- <span class='kw'>end</span>
652
-
653
- <span class='comment'>#merge clusters 'cluster' and 'cluster'+1
654
- </span> <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
655
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>clusters to merge </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_cluster'>cluster</span><span class='rbrace'>}</span><span class='tstring_content'> and </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_cluster'>cluster</span><span class='op'>+</span><span class='int'>1</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
656
- <span class='kw'>end</span>
657
-
658
- <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_cluster'>cluster</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_add'>add</span><span class='lparen'>(</span><span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_cluster'>cluster</span><span class='op'>+</span><span class='int'>1</span><span class='rbracket'>]</span><span class='rparen'>)</span>
659
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_delete_at'>delete_at</span><span class='lparen'>(</span><span class='id identifier rubyid_cluster'>cluster</span><span class='op'>+</span><span class='int'>1</span><span class='rparen'>)</span>
660
-
661
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
662
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_each_with_index'>each_with_index</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_elem'>elem</span><span class='comma'>,</span> <span class='id identifier rubyid_i'>i</span><span class='op'>|</span>
663
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>cluster </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_i'>i</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
664
- <span class='id identifier rubyid_elem'>elem</span><span class='period'>.</span><span class='id identifier rubyid_print'>print</span>
665
- <span class='kw'>end</span>
666
- <span class='kw'>end</span>
667
-
668
- <span class='comment'>#stop condition 3
669
- </span> <span class='comment'>#the density of the biggest clusters exceeds the threshold
670
- </span> <span class='kw'>if</span> <span class='id identifier rubyid_no_clusters'>no_clusters</span> <span class='op'>==</span> <span class='int'>0</span> <span class='kw'>and</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_cluster'>cluster</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_density'>density</span> <span class='op'>&gt;</span> <span class='id identifier rubyid_threshold_density'>threshold_density</span>
671
- <span class='kw'>break</span>
672
- <span class='kw'>end</span>
673
- <span class='kw'>end</span>
674
-
675
- <span class='ivar'>@clusters</span> <span class='op'>=</span> <span class='id identifier rubyid_clusters'>clusters</span>
676
- <span class='id identifier rubyid_clusters'>clusters</span>
677
- <span class='kw'>end</span></pre>
678
- </td>
679
- </tr>
680
- </table>
681
- </div>
682
-
683
- <div class="method_details ">
684
- <h3 class="signature " id="hierarchical_clusterization_2d-instance_method">
685
-
686
- - (<tt>Object</tt>) <strong>hierarchical_clusterization_2d</strong>(no_clusters = 0, distance_method = 0, vec = @values, debug = false)
687
-
688
-
689
-
690
-
691
-
692
- </h3><table class="source_code">
693
- <tr>
694
- <td>
695
- <pre class="lines">
696
-
697
-
698
- 387
699
- 388
700
- 389
701
- 390
702
- 391
703
- 392
704
- 393
705
- 394
706
- 395
707
- 396
708
- 397
709
- 398
710
- 399
711
- 400
712
- 401
713
- 402
714
- 403
715
- 404
716
- 405
717
- 406
718
- 407
719
- 408
720
- 409
721
- 410
722
- 411
723
- 412
724
- 413
725
- 414
726
- 415
727
- 416
728
- 417
729
- 418
730
- 419
731
- 420
732
- 421
733
- 422
734
- 423
735
- 424
736
- 425
737
- 426
738
- 427
739
- 428
740
- 429
741
- 430
742
- 431
743
- 432
744
- 433
745
- 434
746
- 435
747
- 436
748
- 437
749
- 438
750
- 439
751
- 440
752
- 441
753
- 442
754
- 443
755
- 444
756
- 445
757
- 446
758
- 447
759
- 448
760
- 449
761
- 450
762
- 451
763
- 452
764
- 453
765
- 454
766
- 455
767
- 456
768
- 457
769
- 458
770
- 459
771
- 460
772
- 461
773
- 462
774
- 463
775
- 464
776
- 465
777
- 466
778
- 467
779
- 468
780
- 469
781
- 470
782
- 471
783
- 472
784
- 473
785
- 474
786
- 475
787
- 476
788
- 477
789
- 478
790
- 479
791
- 480
792
- 481
793
- 482
794
- 483
795
- 484
796
- 485
797
- 486
798
- 487
799
- 488
800
- 489
801
- 490</pre>
802
- </td>
803
- <td>
804
- <pre class="code"><span class="info file"># File 'lib/genevalidator/clusterization.rb', line 387</span>
805
-
806
- <span class='kw'>def</span> <span class='id identifier rubyid_hierarchical_clusterization_2d'>hierarchical_clusterization_2d</span> <span class='lparen'>(</span><span class='id identifier rubyid_no_clusters'>no_clusters</span> <span class='op'>=</span> <span class='int'>0</span><span class='comma'>,</span> <span class='id identifier rubyid_distance_method'>distance_method</span> <span class='op'>=</span> <span class='int'>0</span><span class='comma'>,</span> <span class='id identifier rubyid_vec'>vec</span> <span class='op'>=</span> <span class='ivar'>@values</span><span class='comma'>,</span> <span class='id identifier rubyid_debug'>debug</span> <span class='op'>=</span> <span class='kw'>false</span><span class='rparen'>)</span>
807
- <span class='id identifier rubyid_clusters'>clusters</span> <span class='op'>=</span> <span class='lbracket'>[</span><span class='rbracket'>]</span>
808
-
809
- <span class='kw'>if</span> <span class='id identifier rubyid_vec'>vec</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>==</span> <span class='int'>1</span>
810
- <span class='id identifier rubyid_hash'>hash</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='id identifier rubyid_vec'>vec</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='op'>=&gt;</span><span class='int'>1</span><span class='rbrace'>}</span>
811
- <span class='id identifier rubyid_cluster'>cluster</span> <span class='op'>=</span> <span class='const'>PairCluster</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='id identifier rubyid_hash'>hash</span><span class='rparen'>)</span>
812
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='id identifier rubyid_cluster'>cluster</span><span class='rparen'>)</span>
813
- <span class='id identifier rubyid_clusters'>clusters</span>
814
- <span class='kw'>end</span>
815
-
816
- <span class='comment'># Thresholds
817
- </span> <span class='comment'># threshold_distance = (0.25 * (vec.max-vec.min))
818
- </span> <span class='id identifier rubyid_threshold_density'>threshold_density</span> <span class='op'>=</span> <span class='lparen'>(</span><span class='float'>0.5</span> <span class='op'>*</span> <span class='id identifier rubyid_vec'>vec</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_to_i'>to_i</span>
819
-
820
- <span class='comment'># make a histogram from the input vector
821
- </span> <span class='id identifier rubyid_histogram'>histogram</span> <span class='op'>=</span> <span class='const'>Hash</span><span class='lbracket'>[</span><span class='id identifier rubyid_vec'>vec</span><span class='period'>.</span><span class='id identifier rubyid_group_by'>group_by</span><span class='lbrace'>{</span><span class='op'>|</span><span class='id identifier rubyid_a'>a</span><span class='op'>|</span> <span class='id identifier rubyid_a'>a</span><span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_map'>map</span> <span class='lbrace'>{</span> <span class='op'>|</span><span class='id identifier rubyid_k'>k</span><span class='comma'>,</span> <span class='id identifier rubyid_vs'>vs</span><span class='op'>|</span> <span class='lbracket'>[</span><span class='id identifier rubyid_k'>k</span><span class='comma'>,</span> <span class='id identifier rubyid_vs'>vs</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='rbracket'>]</span> <span class='rbrace'>}</span><span class='rbracket'>]</span>
822
-
823
- <span class='comment'># clusters = array of clusters
824
- </span> <span class='comment'># initially each length belongs to a different cluster
825
- </span> <span class='id identifier rubyid_histogram'>histogram</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_elem'>elem</span><span class='op'>|</span>
826
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
827
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>pair (</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_elem'>elem</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_x'>x</span><span class='rbrace'>}</span><span class='tstring_content'> </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_elem'>elem</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_y'>y</span><span class='rbrace'>}</span><span class='tstring_content'>) appears </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_elem'>elem</span><span class='lbracket'>[</span><span class='int'>1</span><span class='rbracket'>]</span><span class='rbrace'>}</span><span class='tstring_content'> times</span><span class='tstring_end'>&quot;</span></span>
828
- <span class='kw'>end</span>
829
- <span class='id identifier rubyid_hash'>hash</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='id identifier rubyid_elem'>elem</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span> <span class='op'>=&gt;</span> <span class='id identifier rubyid_elem'>elem</span><span class='lbracket'>[</span><span class='int'>1</span><span class='rbracket'>]</span><span class='rbrace'>}</span>
830
- <span class='id identifier rubyid_cluster'>cluster</span> <span class='op'>=</span> <span class='const'>PairCluster</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='id identifier rubyid_hash'>hash</span><span class='rparen'>)</span>
831
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='id identifier rubyid_cluster'>cluster</span><span class='rparen'>)</span>
832
- <span class='kw'>end</span>
833
-
834
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
835
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_elem'>elem</span><span class='op'>|</span>
836
- <span class='id identifier rubyid_elem'>elem</span><span class='period'>.</span><span class='id identifier rubyid_print'>print</span>
837
- <span class='kw'>end</span>
838
- <span class='kw'>end</span>
839
-
840
- <span class='kw'>if</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>==</span> <span class='int'>1</span>
841
- <span class='kw'>return</span> <span class='id identifier rubyid_clusters'>clusters</span>
842
- <span class='kw'>end</span>
843
-
844
- <span class='comment'># each iteration merge the closest two adiacent cluster
845
- </span> <span class='comment'># the loop stops according to the stop conditions
846
- </span> <span class='id identifier rubyid_iteration'>iteration</span> <span class='op'>=</span> <span class='int'>0</span>
847
- <span class='id identifier rubyid_loop'>loop</span> <span class='kw'>do</span>
848
- <span class='comment'>#stop condition 1
849
- </span> <span class='kw'>if</span> <span class='id identifier rubyid_no_clusters'>no_clusters</span> <span class='op'>!=</span> <span class='int'>0</span> <span class='kw'>and</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>==</span> <span class='id identifier rubyid_no_clusters'>no_clusters</span>
850
- <span class='kw'>break</span>
851
- <span class='kw'>end</span>
852
-
853
- <span class='id identifier rubyid_iteration'>iteration</span> <span class='op'>=</span> <span class='id identifier rubyid_iteration'>iteration</span> <span class='op'>+</span> <span class='int'>1</span>
854
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
855
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>\nIteration </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_iteration'>iteration</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
856
- <span class='kw'>end</span>
857
-
858
- <span class='id identifier rubyid_min_distance'>min_distance</span> <span class='op'>=</span> <span class='int'>100000000</span>
859
- <span class='id identifier rubyid_cluster1'>cluster1</span> <span class='op'>=</span> <span class='int'>0</span>
860
- <span class='id identifier rubyid_cluster2'>cluster2</span> <span class='op'>=</span> <span class='int'>0</span>
861
- <span class='id identifier rubyid_density'>density</span> <span class='op'>=</span> <span class='int'>0</span>
862
-
863
- <span class='lbracket'>[</span><span class='op'>*</span><span class='lparen'>(</span><span class='int'>0</span><span class='op'>..</span><span class='lparen'>(</span><span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>-</span><span class='int'>2</span><span class='rparen'>)</span><span class='rparen'>)</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_i'>i</span><span class='op'>|</span>
864
- <span class='lbracket'>[</span><span class='op'>*</span><span class='lparen'>(</span><span class='lparen'>(</span><span class='id identifier rubyid_i'>i</span><span class='op'>+</span><span class='int'>1</span><span class='rparen'>)</span><span class='op'>..</span><span class='lparen'>(</span><span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>-</span><span class='int'>1</span><span class='rparen'>)</span><span class='rparen'>)</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_j'>j</span><span class='op'>|</span>
865
- <span class='id identifier rubyid_dist'>dist</span> <span class='op'>=</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_i'>i</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_distance'>distance</span><span class='lparen'>(</span><span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_j'>j</span><span class='rbracket'>]</span><span class='comma'>,</span> <span class='id identifier rubyid_distance_method'>distance_method</span><span class='rparen'>)</span>
866
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
867
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>distance between clusters </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_i'>i</span><span class='rbrace'>}</span><span class='tstring_content'> and </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_j'>j</span><span class='rbrace'>}</span><span class='tstring_content'> is </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_dist'>dist</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
868
- <span class='kw'>end</span>
869
- <span class='id identifier rubyid_current_density'>current_density</span> <span class='op'>=</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_i'>i</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_density'>density</span> <span class='op'>+</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_j'>j</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_density'>density</span>
870
- <span class='kw'>if</span> <span class='id identifier rubyid_dist'>dist</span> <span class='op'>&lt;</span> <span class='id identifier rubyid_min_distance'>min_distance</span>
871
- <span class='id identifier rubyid_min_distance'>min_distance</span> <span class='op'>=</span> <span class='id identifier rubyid_dist'>dist</span>
872
- <span class='id identifier rubyid_cluster1'>cluster1</span> <span class='op'>=</span> <span class='id identifier rubyid_i'>i</span>
873
- <span class='id identifier rubyid_cluster2'>cluster2</span> <span class='op'>=</span> <span class='id identifier rubyid_j'>j</span>
874
- <span class='id identifier rubyid_density'>density</span> <span class='op'>=</span> <span class='id identifier rubyid_current_density'>current_density</span>
875
- <span class='kw'>else</span>
876
- <span class='kw'>if</span> <span class='id identifier rubyid_dist'>dist</span> <span class='op'>==</span> <span class='id identifier rubyid_min_distance'>min_distance</span> <span class='kw'>and</span> <span class='id identifier rubyid_density'>density</span> <span class='op'>&lt;</span> <span class='id identifier rubyid_current_density'>current_density</span>
877
- <span class='id identifier rubyid_cluster1'>cluster1</span> <span class='op'>=</span> <span class='id identifier rubyid_i'>i</span>
878
- <span class='id identifier rubyid_cluster2'>cluster2</span> <span class='op'>=</span> <span class='id identifier rubyid_j'>j</span>
879
- <span class='id identifier rubyid_density'>density</span> <span class='op'>=</span> <span class='id identifier rubyid_current_density'>current_density</span>
880
- <span class='kw'>end</span>
881
- <span class='kw'>end</span>
882
- <span class='kw'>end</span>
883
- <span class='kw'>end</span>
884
-
885
- <span class='comment'># merge clusters 'cluster1' and 'cluster2'
886
- </span> <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
887
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>clusters to merge </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_cluster1'>cluster1</span><span class='rbrace'>}</span><span class='tstring_content'> and </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_cluster2'>cluster2</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
888
- <span class='kw'>end</span>
889
- <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_cluster1'>cluster1</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_add'>add</span><span class='lparen'>(</span><span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_cluster2'>cluster2</span><span class='rbracket'>]</span><span class='rparen'>)</span>
890
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_delete_at'>delete_at</span><span class='lparen'>(</span><span class='id identifier rubyid_cluster2'>cluster2</span><span class='rparen'>)</span>
891
-
892
- <span class='kw'>if</span> <span class='id identifier rubyid_debug'>debug</span>
893
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_each_with_index'>each_with_index</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_elem'>elem</span><span class='comma'>,</span> <span class='id identifier rubyid_i'>i</span><span class='op'>|</span>
894
- <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>cluster </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_i'>i</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
895
- <span class='id identifier rubyid_elem'>elem</span><span class='period'>.</span><span class='id identifier rubyid_print'>print</span>
896
- <span class='kw'>end</span>
897
- <span class='kw'>end</span>
898
-
899
- <span class='comment'>#stop condition 3
900
- </span> <span class='comment'>#the density of the biggest clusters exceeds the threshold
901
- </span> <span class='kw'>if</span> <span class='id identifier rubyid_no_clusters'>no_clusters</span> <span class='op'>==</span> <span class='int'>0</span> <span class='kw'>and</span> <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_cluster'>cluster</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_density'>density</span> <span class='op'>&gt;</span> <span class='id identifier rubyid_threshold_density'>threshold_density</span>
902
- <span class='kw'>break</span>
903
- <span class='kw'>end</span>
904
- <span class='kw'>end</span>
905
-
906
- <span class='ivar'>@clusters</span> <span class='op'>=</span> <span class='id identifier rubyid_clusters'>clusters</span>
907
- <span class='id identifier rubyid_clusters'>clusters</span>
908
-
909
- <span class='kw'>end</span></pre>
910
- </td>
911
- </tr>
912
- </table>
913
- </div>
914
-
915
- <div class="method_details ">
916
- <h3 class="signature " id="most_dense_cluster-instance_method">
917
-
918
- - (<tt>Object</tt>) <strong>most_dense_cluster</strong>(clusters = @clusters)
919
-
920
-
921
-
922
-
923
-
924
- </h3><div class="docstring">
925
- <div class="discussion">
926
-
927
- <p>Returns the cluster with the maimum density Params: <tt>clusters</tt>: list
928
- of <tt>Clususter</tt> objects</p>
929
-
930
-
931
- </div>
932
- </div>
933
- <div class="tags">
934
-
935
-
936
- </div><table class="source_code">
937
- <tr>
938
- <td>
939
- <pre class="lines">
940
-
941
-
942
- 616
943
- 617
944
- 618
945
- 619
946
- 620
947
- 621
948
- 622
949
- 623
950
- 624
951
- 625
952
- 626
953
- 627
954
- 628
955
- 629
956
- 630
957
- 631</pre>
958
- </td>
959
- <td>
960
- <pre class="code"><span class="info file"># File 'lib/genevalidator/clusterization.rb', line 616</span>
961
-
962
- <span class='kw'>def</span> <span class='id identifier rubyid_most_dense_cluster'>most_dense_cluster</span><span class='lparen'>(</span><span class='id identifier rubyid_clusters'>clusters</span> <span class='op'>=</span> <span class='ivar'>@clusters</span><span class='rparen'>)</span>
963
- <span class='id identifier rubyid_max_density'>max_density</span> <span class='op'>=</span> <span class='int'>0</span><span class='semicolon'>;</span>
964
- <span class='id identifier rubyid_max_density_cluster'>max_density_cluster</span> <span class='op'>=</span> <span class='int'>0</span><span class='semicolon'>;</span>
965
-
966
- <span class='kw'>if</span> <span class='id identifier rubyid_clusters'>clusters</span> <span class='op'>==</span> <span class='kw'>nil</span>
967
- <span class='kw'>nil</span>
968
- <span class='kw'>end</span>
969
-
970
- <span class='id identifier rubyid_clusters'>clusters</span><span class='period'>.</span><span class='id identifier rubyid_each_with_index'>each_with_index</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_item'>item</span><span class='comma'>,</span> <span class='id identifier rubyid_i'>i</span><span class='op'>|</span>
971
- <span class='kw'>if</span> <span class='id identifier rubyid_item'>item</span><span class='period'>.</span><span class='id identifier rubyid_density'>density</span> <span class='op'>&gt;</span> <span class='id identifier rubyid_max_density'>max_density</span>
972
- <span class='id identifier rubyid_max_density'>max_density</span> <span class='op'>=</span> <span class='id identifier rubyid_item'>item</span><span class='period'>.</span><span class='id identifier rubyid_density'>density</span>
973
- <span class='id identifier rubyid_max_density_cluster'>max_density_cluster</span> <span class='op'>=</span> <span class='id identifier rubyid_i'>i</span><span class='semicolon'>;</span>
974
- <span class='kw'>end</span>
975
- <span class='kw'>end</span>
976
- <span class='id identifier rubyid_clusters'>clusters</span><span class='lbracket'>[</span><span class='id identifier rubyid_max_density_cluster'>max_density_cluster</span><span class='rbracket'>]</span>
977
- <span class='kw'>end</span></pre>
978
- </td>
979
- </tr>
980
- </table>
981
- </div>
982
-
983
- </div>
984
-
985
- </div>
986
-
987
- <div id="footer">
988
- Generated on Sat Sep 28 07:01:33 2013 by
989
- <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
990
- 0.8.7.2 (ruby-1.9.3).
991
- </div>
992
-
993
- </body>
994
- </html>