ruby-spark 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +37 -0
  3. data/Gemfile +47 -0
  4. data/Guardfile +5 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +185 -0
  7. data/Rakefile +35 -0
  8. data/TODO.md +7 -0
  9. data/benchmark/aggregate.rb +33 -0
  10. data/benchmark/bisect.rb +88 -0
  11. data/benchmark/custom_marshal.rb +94 -0
  12. data/benchmark/digest.rb +150 -0
  13. data/benchmark/enumerator.rb +88 -0
  14. data/benchmark/performance/prepare.sh +18 -0
  15. data/benchmark/performance/python.py +156 -0
  16. data/benchmark/performance/r.r +69 -0
  17. data/benchmark/performance/ruby.rb +167 -0
  18. data/benchmark/performance/run-all.sh +160 -0
  19. data/benchmark/performance/scala.scala +181 -0
  20. data/benchmark/serializer.rb +82 -0
  21. data/benchmark/sort.rb +43 -0
  22. data/benchmark/sort2.rb +164 -0
  23. data/benchmark/take.rb +28 -0
  24. data/bin/ruby-spark +8 -0
  25. data/example/pi.rb +28 -0
  26. data/ext/ruby_c/extconf.rb +3 -0
  27. data/ext/ruby_c/murmur.c +158 -0
  28. data/ext/ruby_c/murmur.h +9 -0
  29. data/ext/ruby_c/ruby-spark.c +18 -0
  30. data/ext/ruby_java/Digest.java +36 -0
  31. data/ext/ruby_java/Murmur2.java +98 -0
  32. data/ext/ruby_java/RubySparkExtService.java +28 -0
  33. data/ext/ruby_java/extconf.rb +3 -0
  34. data/ext/spark/build.sbt +73 -0
  35. data/ext/spark/project/plugins.sbt +9 -0
  36. data/ext/spark/sbt/sbt +34 -0
  37. data/ext/spark/src/main/scala/Exec.scala +91 -0
  38. data/ext/spark/src/main/scala/MLLibAPI.scala +4 -0
  39. data/ext/spark/src/main/scala/Marshal.scala +52 -0
  40. data/ext/spark/src/main/scala/MarshalDump.scala +113 -0
  41. data/ext/spark/src/main/scala/MarshalLoad.scala +220 -0
  42. data/ext/spark/src/main/scala/RubyAccumulatorParam.scala +69 -0
  43. data/ext/spark/src/main/scala/RubyBroadcast.scala +13 -0
  44. data/ext/spark/src/main/scala/RubyConstant.scala +13 -0
  45. data/ext/spark/src/main/scala/RubyMLLibAPI.scala +55 -0
  46. data/ext/spark/src/main/scala/RubyMLLibUtilAPI.scala +21 -0
  47. data/ext/spark/src/main/scala/RubyPage.scala +34 -0
  48. data/ext/spark/src/main/scala/RubyRDD.scala +364 -0
  49. data/ext/spark/src/main/scala/RubySerializer.scala +14 -0
  50. data/ext/spark/src/main/scala/RubyTab.scala +11 -0
  51. data/ext/spark/src/main/scala/RubyUtils.scala +15 -0
  52. data/ext/spark/src/main/scala/RubyWorker.scala +257 -0
  53. data/ext/spark/src/test/scala/MarshalSpec.scala +84 -0
  54. data/lib/ruby-spark.rb +1 -0
  55. data/lib/spark.rb +198 -0
  56. data/lib/spark/accumulator.rb +260 -0
  57. data/lib/spark/broadcast.rb +98 -0
  58. data/lib/spark/build.rb +43 -0
  59. data/lib/spark/cli.rb +169 -0
  60. data/lib/spark/command.rb +86 -0
  61. data/lib/spark/command/base.rb +154 -0
  62. data/lib/spark/command/basic.rb +345 -0
  63. data/lib/spark/command/pair.rb +124 -0
  64. data/lib/spark/command/sort.rb +51 -0
  65. data/lib/spark/command/statistic.rb +144 -0
  66. data/lib/spark/command_builder.rb +141 -0
  67. data/lib/spark/command_validator.rb +34 -0
  68. data/lib/spark/config.rb +244 -0
  69. data/lib/spark/constant.rb +14 -0
  70. data/lib/spark/context.rb +304 -0
  71. data/lib/spark/error.rb +50 -0
  72. data/lib/spark/ext/hash.rb +41 -0
  73. data/lib/spark/ext/integer.rb +25 -0
  74. data/lib/spark/ext/io.rb +57 -0
  75. data/lib/spark/ext/ip_socket.rb +29 -0
  76. data/lib/spark/ext/module.rb +58 -0
  77. data/lib/spark/ext/object.rb +24 -0
  78. data/lib/spark/ext/string.rb +24 -0
  79. data/lib/spark/helper.rb +10 -0
  80. data/lib/spark/helper/logger.rb +40 -0
  81. data/lib/spark/helper/parser.rb +85 -0
  82. data/lib/spark/helper/serialize.rb +71 -0
  83. data/lib/spark/helper/statistic.rb +93 -0
  84. data/lib/spark/helper/system.rb +42 -0
  85. data/lib/spark/java_bridge.rb +19 -0
  86. data/lib/spark/java_bridge/base.rb +203 -0
  87. data/lib/spark/java_bridge/jruby.rb +23 -0
  88. data/lib/spark/java_bridge/rjb.rb +41 -0
  89. data/lib/spark/logger.rb +76 -0
  90. data/lib/spark/mllib.rb +100 -0
  91. data/lib/spark/mllib/classification/common.rb +31 -0
  92. data/lib/spark/mllib/classification/logistic_regression.rb +223 -0
  93. data/lib/spark/mllib/classification/naive_bayes.rb +97 -0
  94. data/lib/spark/mllib/classification/svm.rb +135 -0
  95. data/lib/spark/mllib/clustering/gaussian_mixture.rb +82 -0
  96. data/lib/spark/mllib/clustering/kmeans.rb +118 -0
  97. data/lib/spark/mllib/matrix.rb +120 -0
  98. data/lib/spark/mllib/regression/common.rb +73 -0
  99. data/lib/spark/mllib/regression/labeled_point.rb +41 -0
  100. data/lib/spark/mllib/regression/lasso.rb +100 -0
  101. data/lib/spark/mllib/regression/linear.rb +124 -0
  102. data/lib/spark/mllib/regression/ridge.rb +97 -0
  103. data/lib/spark/mllib/ruby_matrix/matrix_adapter.rb +53 -0
  104. data/lib/spark/mllib/ruby_matrix/vector_adapter.rb +57 -0
  105. data/lib/spark/mllib/stat/distribution.rb +12 -0
  106. data/lib/spark/mllib/vector.rb +185 -0
  107. data/lib/spark/rdd.rb +1328 -0
  108. data/lib/spark/sampler.rb +92 -0
  109. data/lib/spark/serializer.rb +24 -0
  110. data/lib/spark/serializer/base.rb +170 -0
  111. data/lib/spark/serializer/cartesian.rb +37 -0
  112. data/lib/spark/serializer/marshal.rb +19 -0
  113. data/lib/spark/serializer/message_pack.rb +25 -0
  114. data/lib/spark/serializer/oj.rb +25 -0
  115. data/lib/spark/serializer/pair.rb +27 -0
  116. data/lib/spark/serializer/utf8.rb +25 -0
  117. data/lib/spark/sort.rb +189 -0
  118. data/lib/spark/stat_counter.rb +125 -0
  119. data/lib/spark/storage_level.rb +39 -0
  120. data/lib/spark/version.rb +3 -0
  121. data/lib/spark/worker/master.rb +144 -0
  122. data/lib/spark/worker/spark_files.rb +15 -0
  123. data/lib/spark/worker/worker.rb +197 -0
  124. data/ruby-spark.gemspec +36 -0
  125. data/spec/generator.rb +37 -0
  126. data/spec/inputs/lorem_300.txt +316 -0
  127. data/spec/inputs/numbers/1.txt +50 -0
  128. data/spec/inputs/numbers/10.txt +50 -0
  129. data/spec/inputs/numbers/11.txt +50 -0
  130. data/spec/inputs/numbers/12.txt +50 -0
  131. data/spec/inputs/numbers/13.txt +50 -0
  132. data/spec/inputs/numbers/14.txt +50 -0
  133. data/spec/inputs/numbers/15.txt +50 -0
  134. data/spec/inputs/numbers/16.txt +50 -0
  135. data/spec/inputs/numbers/17.txt +50 -0
  136. data/spec/inputs/numbers/18.txt +50 -0
  137. data/spec/inputs/numbers/19.txt +50 -0
  138. data/spec/inputs/numbers/2.txt +50 -0
  139. data/spec/inputs/numbers/20.txt +50 -0
  140. data/spec/inputs/numbers/3.txt +50 -0
  141. data/spec/inputs/numbers/4.txt +50 -0
  142. data/spec/inputs/numbers/5.txt +50 -0
  143. data/spec/inputs/numbers/6.txt +50 -0
  144. data/spec/inputs/numbers/7.txt +50 -0
  145. data/spec/inputs/numbers/8.txt +50 -0
  146. data/spec/inputs/numbers/9.txt +50 -0
  147. data/spec/inputs/numbers_0_100.txt +101 -0
  148. data/spec/inputs/numbers_1_100.txt +100 -0
  149. data/spec/lib/collect_spec.rb +42 -0
  150. data/spec/lib/command_spec.rb +68 -0
  151. data/spec/lib/config_spec.rb +64 -0
  152. data/spec/lib/context_spec.rb +163 -0
  153. data/spec/lib/ext_spec.rb +72 -0
  154. data/spec/lib/external_apps_spec.rb +45 -0
  155. data/spec/lib/filter_spec.rb +80 -0
  156. data/spec/lib/flat_map_spec.rb +100 -0
  157. data/spec/lib/group_spec.rb +109 -0
  158. data/spec/lib/helper_spec.rb +19 -0
  159. data/spec/lib/key_spec.rb +41 -0
  160. data/spec/lib/manipulation_spec.rb +114 -0
  161. data/spec/lib/map_partitions_spec.rb +87 -0
  162. data/spec/lib/map_spec.rb +91 -0
  163. data/spec/lib/mllib/classification_spec.rb +54 -0
  164. data/spec/lib/mllib/clustering_spec.rb +35 -0
  165. data/spec/lib/mllib/matrix_spec.rb +32 -0
  166. data/spec/lib/mllib/regression_spec.rb +116 -0
  167. data/spec/lib/mllib/vector_spec.rb +77 -0
  168. data/spec/lib/reduce_by_key_spec.rb +118 -0
  169. data/spec/lib/reduce_spec.rb +131 -0
  170. data/spec/lib/sample_spec.rb +46 -0
  171. data/spec/lib/serializer_spec.rb +13 -0
  172. data/spec/lib/sort_spec.rb +58 -0
  173. data/spec/lib/statistic_spec.rb +168 -0
  174. data/spec/lib/whole_text_files_spec.rb +33 -0
  175. data/spec/spec_helper.rb +39 -0
  176. metadata +301 -0
@@ -0,0 +1,50 @@
1
+ 1
2
+ 2
3
+ 3
4
+ 4
5
+ 5
6
+ 6
7
+ 7
8
+ 8
9
+ 9
10
+ 10
11
+ 11
12
+ 12
13
+ 13
14
+ 14
15
+ 15
16
+ 16
17
+ 17
18
+ 18
19
+ 19
20
+ 20
21
+ 21
22
+ 22
23
+ 23
24
+ 24
25
+ 25
26
+ 26
27
+ 27
28
+ 28
29
+ 29
30
+ 30
31
+ 31
32
+ 32
33
+ 33
34
+ 34
35
+ 35
36
+ 36
37
+ 37
38
+ 38
39
+ 39
40
+ 40
41
+ 41
42
+ 42
43
+ 43
44
+ 44
45
+ 45
46
+ 46
47
+ 47
48
+ 48
49
+ 49
50
+ 50
@@ -0,0 +1,50 @@
1
+ 451
2
+ 452
3
+ 453
4
+ 454
5
+ 455
6
+ 456
7
+ 457
8
+ 458
9
+ 459
10
+ 460
11
+ 461
12
+ 462
13
+ 463
14
+ 464
15
+ 465
16
+ 466
17
+ 467
18
+ 468
19
+ 469
20
+ 470
21
+ 471
22
+ 472
23
+ 473
24
+ 474
25
+ 475
26
+ 476
27
+ 477
28
+ 478
29
+ 479
30
+ 480
31
+ 481
32
+ 482
33
+ 483
34
+ 484
35
+ 485
36
+ 486
37
+ 487
38
+ 488
39
+ 489
40
+ 490
41
+ 491
42
+ 492
43
+ 493
44
+ 494
45
+ 495
46
+ 496
47
+ 497
48
+ 498
49
+ 499
50
+ 500
@@ -0,0 +1,50 @@
1
+ 501
2
+ 502
3
+ 503
4
+ 504
5
+ 505
6
+ 506
7
+ 507
8
+ 508
9
+ 509
10
+ 510
11
+ 511
12
+ 512
13
+ 513
14
+ 514
15
+ 515
16
+ 516
17
+ 517
18
+ 518
19
+ 519
20
+ 520
21
+ 521
22
+ 522
23
+ 523
24
+ 524
25
+ 525
26
+ 526
27
+ 527
28
+ 528
29
+ 529
30
+ 530
31
+ 531
32
+ 532
33
+ 533
34
+ 534
35
+ 535
36
+ 536
37
+ 537
38
+ 538
39
+ 539
40
+ 540
41
+ 541
42
+ 542
43
+ 543
44
+ 544
45
+ 545
46
+ 546
47
+ 547
48
+ 548
49
+ 549
50
+ 550
@@ -0,0 +1,50 @@
1
+ 551
2
+ 552
3
+ 553
4
+ 554
5
+ 555
6
+ 556
7
+ 557
8
+ 558
9
+ 559
10
+ 560
11
+ 561
12
+ 562
13
+ 563
14
+ 564
15
+ 565
16
+ 566
17
+ 567
18
+ 568
19
+ 569
20
+ 570
21
+ 571
22
+ 572
23
+ 573
24
+ 574
25
+ 575
26
+ 576
27
+ 577
28
+ 578
29
+ 579
30
+ 580
31
+ 581
32
+ 582
33
+ 583
34
+ 584
35
+ 585
36
+ 586
37
+ 587
38
+ 588
39
+ 589
40
+ 590
41
+ 591
42
+ 592
43
+ 593
44
+ 594
45
+ 595
46
+ 596
47
+ 597
48
+ 598
49
+ 599
50
+ 600
@@ -0,0 +1,50 @@
1
+ 601
2
+ 602
3
+ 603
4
+ 604
5
+ 605
6
+ 606
7
+ 607
8
+ 608
9
+ 609
10
+ 610
11
+ 611
12
+ 612
13
+ 613
14
+ 614
15
+ 615
16
+ 616
17
+ 617
18
+ 618
19
+ 619
20
+ 620
21
+ 621
22
+ 622
23
+ 623
24
+ 624
25
+ 625
26
+ 626
27
+ 627
28
+ 628
29
+ 629
30
+ 630
31
+ 631
32
+ 632
33
+ 633
34
+ 634
35
+ 635
36
+ 636
37
+ 637
38
+ 638
39
+ 639
40
+ 640
41
+ 641
42
+ 642
43
+ 643
44
+ 644
45
+ 645
46
+ 646
47
+ 647
48
+ 648
49
+ 649
50
+ 650
@@ -0,0 +1,50 @@
1
+ 651
2
+ 652
3
+ 653
4
+ 654
5
+ 655
6
+ 656
7
+ 657
8
+ 658
9
+ 659
10
+ 660
11
+ 661
12
+ 662
13
+ 663
14
+ 664
15
+ 665
16
+ 666
17
+ 667
18
+ 668
19
+ 669
20
+ 670
21
+ 671
22
+ 672
23
+ 673
24
+ 674
25
+ 675
26
+ 676
27
+ 677
28
+ 678
29
+ 679
30
+ 680
31
+ 681
32
+ 682
33
+ 683
34
+ 684
35
+ 685
36
+ 686
37
+ 687
38
+ 688
39
+ 689
40
+ 690
41
+ 691
42
+ 692
43
+ 693
44
+ 694
45
+ 695
46
+ 696
47
+ 697
48
+ 698
49
+ 699
50
+ 700
@@ -0,0 +1,50 @@
1
+ 701
2
+ 702
3
+ 703
4
+ 704
5
+ 705
6
+ 706
7
+ 707
8
+ 708
9
+ 709
10
+ 710
11
+ 711
12
+ 712
13
+ 713
14
+ 714
15
+ 715
16
+ 716
17
+ 717
18
+ 718
19
+ 719
20
+ 720
21
+ 721
22
+ 722
23
+ 723
24
+ 724
25
+ 725
26
+ 726
27
+ 727
28
+ 728
29
+ 729
30
+ 730
31
+ 731
32
+ 732
33
+ 733
34
+ 734
35
+ 735
36
+ 736
37
+ 737
38
+ 738
39
+ 739
40
+ 740
41
+ 741
42
+ 742
43
+ 743
44
+ 744
45
+ 745
46
+ 746
47
+ 747
48
+ 748
49
+ 749
50
+ 750
@@ -0,0 +1,50 @@
1
+ 751
2
+ 752
3
+ 753
4
+ 754
5
+ 755
6
+ 756
7
+ 757
8
+ 758
9
+ 759
10
+ 760
11
+ 761
12
+ 762
13
+ 763
14
+ 764
15
+ 765
16
+ 766
17
+ 767
18
+ 768
19
+ 769
20
+ 770
21
+ 771
22
+ 772
23
+ 773
24
+ 774
25
+ 775
26
+ 776
27
+ 777
28
+ 778
29
+ 779
30
+ 780
31
+ 781
32
+ 782
33
+ 783
34
+ 784
35
+ 785
36
+ 786
37
+ 787
38
+ 788
39
+ 789
40
+ 790
41
+ 791
42
+ 792
43
+ 793
44
+ 794
45
+ 795
46
+ 796
47
+ 797
48
+ 798
49
+ 799
50
+ 800
@@ -0,0 +1,50 @@
1
+ 801
2
+ 802
3
+ 803
4
+ 804
5
+ 805
6
+ 806
7
+ 807
8
+ 808
9
+ 809
10
+ 810
11
+ 811
12
+ 812
13
+ 813
14
+ 814
15
+ 815
16
+ 816
17
+ 817
18
+ 818
19
+ 819
20
+ 820
21
+ 821
22
+ 822
23
+ 823
24
+ 824
25
+ 825
26
+ 826
27
+ 827
28
+ 828
29
+ 829
30
+ 830
31
+ 831
32
+ 832
33
+ 833
34
+ 834
35
+ 835
36
+ 836
37
+ 837
38
+ 838
39
+ 839
40
+ 840
41
+ 841
42
+ 842
43
+ 843
44
+ 844
45
+ 845
46
+ 846
47
+ 847
48
+ 848
49
+ 849
50
+ 850