ruby-spark 1.1.0.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +37 -0
  3. data/Gemfile +47 -0
  4. data/Guardfile +5 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +252 -0
  7. data/Rakefile +35 -0
  8. data/TODO.md +6 -0
  9. data/benchmark/aggregate.rb +33 -0
  10. data/benchmark/bisect.rb +88 -0
  11. data/benchmark/comparison/prepare.sh +18 -0
  12. data/benchmark/comparison/python.py +156 -0
  13. data/benchmark/comparison/r.r +69 -0
  14. data/benchmark/comparison/ruby.rb +167 -0
  15. data/benchmark/comparison/run-all.sh +160 -0
  16. data/benchmark/comparison/scala.scala +181 -0
  17. data/benchmark/custom_marshal.rb +94 -0
  18. data/benchmark/digest.rb +150 -0
  19. data/benchmark/enumerator.rb +88 -0
  20. data/benchmark/serializer.rb +82 -0
  21. data/benchmark/sort.rb +43 -0
  22. data/benchmark/sort2.rb +164 -0
  23. data/benchmark/take.rb +28 -0
  24. data/bin/ruby-spark +8 -0
  25. data/example/pi.rb +28 -0
  26. data/example/website_search.rb +83 -0
  27. data/ext/ruby_c/extconf.rb +3 -0
  28. data/ext/ruby_c/murmur.c +158 -0
  29. data/ext/ruby_c/murmur.h +9 -0
  30. data/ext/ruby_c/ruby-spark.c +18 -0
  31. data/ext/ruby_java/Digest.java +36 -0
  32. data/ext/ruby_java/Murmur2.java +98 -0
  33. data/ext/ruby_java/RubySparkExtService.java +28 -0
  34. data/ext/ruby_java/extconf.rb +3 -0
  35. data/ext/spark/build.sbt +73 -0
  36. data/ext/spark/project/plugins.sbt +9 -0
  37. data/ext/spark/sbt/sbt +34 -0
  38. data/ext/spark/src/main/scala/Exec.scala +91 -0
  39. data/ext/spark/src/main/scala/MLLibAPI.scala +4 -0
  40. data/ext/spark/src/main/scala/Marshal.scala +52 -0
  41. data/ext/spark/src/main/scala/MarshalDump.scala +113 -0
  42. data/ext/spark/src/main/scala/MarshalLoad.scala +220 -0
  43. data/ext/spark/src/main/scala/RubyAccumulatorParam.scala +69 -0
  44. data/ext/spark/src/main/scala/RubyBroadcast.scala +13 -0
  45. data/ext/spark/src/main/scala/RubyConstant.scala +13 -0
  46. data/ext/spark/src/main/scala/RubyMLLibAPI.scala +55 -0
  47. data/ext/spark/src/main/scala/RubyMLLibUtilAPI.scala +21 -0
  48. data/ext/spark/src/main/scala/RubyPage.scala +34 -0
  49. data/ext/spark/src/main/scala/RubyRDD.scala +392 -0
  50. data/ext/spark/src/main/scala/RubySerializer.scala +14 -0
  51. data/ext/spark/src/main/scala/RubyTab.scala +11 -0
  52. data/ext/spark/src/main/scala/RubyUtils.scala +15 -0
  53. data/ext/spark/src/main/scala/RubyWorker.scala +257 -0
  54. data/ext/spark/src/test/scala/MarshalSpec.scala +84 -0
  55. data/lib/ruby-spark.rb +1 -0
  56. data/lib/spark.rb +198 -0
  57. data/lib/spark/accumulator.rb +260 -0
  58. data/lib/spark/broadcast.rb +98 -0
  59. data/lib/spark/build.rb +43 -0
  60. data/lib/spark/cli.rb +169 -0
  61. data/lib/spark/command.rb +86 -0
  62. data/lib/spark/command/base.rb +158 -0
  63. data/lib/spark/command/basic.rb +345 -0
  64. data/lib/spark/command/pair.rb +124 -0
  65. data/lib/spark/command/sort.rb +51 -0
  66. data/lib/spark/command/statistic.rb +144 -0
  67. data/lib/spark/command_builder.rb +141 -0
  68. data/lib/spark/command_validator.rb +34 -0
  69. data/lib/spark/config.rb +238 -0
  70. data/lib/spark/constant.rb +14 -0
  71. data/lib/spark/context.rb +322 -0
  72. data/lib/spark/error.rb +50 -0
  73. data/lib/spark/ext/hash.rb +41 -0
  74. data/lib/spark/ext/integer.rb +25 -0
  75. data/lib/spark/ext/io.rb +67 -0
  76. data/lib/spark/ext/ip_socket.rb +29 -0
  77. data/lib/spark/ext/module.rb +58 -0
  78. data/lib/spark/ext/object.rb +24 -0
  79. data/lib/spark/ext/string.rb +24 -0
  80. data/lib/spark/helper.rb +10 -0
  81. data/lib/spark/helper/logger.rb +40 -0
  82. data/lib/spark/helper/parser.rb +85 -0
  83. data/lib/spark/helper/serialize.rb +71 -0
  84. data/lib/spark/helper/statistic.rb +93 -0
  85. data/lib/spark/helper/system.rb +42 -0
  86. data/lib/spark/java_bridge.rb +19 -0
  87. data/lib/spark/java_bridge/base.rb +203 -0
  88. data/lib/spark/java_bridge/jruby.rb +23 -0
  89. data/lib/spark/java_bridge/rjb.rb +41 -0
  90. data/lib/spark/logger.rb +76 -0
  91. data/lib/spark/mllib.rb +100 -0
  92. data/lib/spark/mllib/classification/common.rb +31 -0
  93. data/lib/spark/mllib/classification/logistic_regression.rb +223 -0
  94. data/lib/spark/mllib/classification/naive_bayes.rb +97 -0
  95. data/lib/spark/mllib/classification/svm.rb +135 -0
  96. data/lib/spark/mllib/clustering/gaussian_mixture.rb +82 -0
  97. data/lib/spark/mllib/clustering/kmeans.rb +118 -0
  98. data/lib/spark/mllib/matrix.rb +120 -0
  99. data/lib/spark/mllib/regression/common.rb +73 -0
  100. data/lib/spark/mllib/regression/labeled_point.rb +41 -0
  101. data/lib/spark/mllib/regression/lasso.rb +100 -0
  102. data/lib/spark/mllib/regression/linear.rb +124 -0
  103. data/lib/spark/mllib/regression/ridge.rb +97 -0
  104. data/lib/spark/mllib/ruby_matrix/matrix_adapter.rb +53 -0
  105. data/lib/spark/mllib/ruby_matrix/vector_adapter.rb +57 -0
  106. data/lib/spark/mllib/stat/distribution.rb +12 -0
  107. data/lib/spark/mllib/vector.rb +185 -0
  108. data/lib/spark/rdd.rb +1377 -0
  109. data/lib/spark/sampler.rb +92 -0
  110. data/lib/spark/serializer.rb +79 -0
  111. data/lib/spark/serializer/auto_batched.rb +59 -0
  112. data/lib/spark/serializer/base.rb +63 -0
  113. data/lib/spark/serializer/batched.rb +84 -0
  114. data/lib/spark/serializer/cartesian.rb +13 -0
  115. data/lib/spark/serializer/compressed.rb +27 -0
  116. data/lib/spark/serializer/marshal.rb +17 -0
  117. data/lib/spark/serializer/message_pack.rb +23 -0
  118. data/lib/spark/serializer/oj.rb +23 -0
  119. data/lib/spark/serializer/pair.rb +41 -0
  120. data/lib/spark/serializer/text.rb +25 -0
  121. data/lib/spark/sort.rb +189 -0
  122. data/lib/spark/stat_counter.rb +125 -0
  123. data/lib/spark/storage_level.rb +39 -0
  124. data/lib/spark/version.rb +3 -0
  125. data/lib/spark/worker/master.rb +144 -0
  126. data/lib/spark/worker/spark_files.rb +15 -0
  127. data/lib/spark/worker/worker.rb +200 -0
  128. data/ruby-spark.gemspec +47 -0
  129. data/spec/generator.rb +37 -0
  130. data/spec/inputs/lorem_300.txt +316 -0
  131. data/spec/inputs/numbers/1.txt +50 -0
  132. data/spec/inputs/numbers/10.txt +50 -0
  133. data/spec/inputs/numbers/11.txt +50 -0
  134. data/spec/inputs/numbers/12.txt +50 -0
  135. data/spec/inputs/numbers/13.txt +50 -0
  136. data/spec/inputs/numbers/14.txt +50 -0
  137. data/spec/inputs/numbers/15.txt +50 -0
  138. data/spec/inputs/numbers/16.txt +50 -0
  139. data/spec/inputs/numbers/17.txt +50 -0
  140. data/spec/inputs/numbers/18.txt +50 -0
  141. data/spec/inputs/numbers/19.txt +50 -0
  142. data/spec/inputs/numbers/2.txt +50 -0
  143. data/spec/inputs/numbers/20.txt +50 -0
  144. data/spec/inputs/numbers/3.txt +50 -0
  145. data/spec/inputs/numbers/4.txt +50 -0
  146. data/spec/inputs/numbers/5.txt +50 -0
  147. data/spec/inputs/numbers/6.txt +50 -0
  148. data/spec/inputs/numbers/7.txt +50 -0
  149. data/spec/inputs/numbers/8.txt +50 -0
  150. data/spec/inputs/numbers/9.txt +50 -0
  151. data/spec/inputs/numbers_0_100.txt +101 -0
  152. data/spec/inputs/numbers_1_100.txt +100 -0
  153. data/spec/lib/collect_spec.rb +42 -0
  154. data/spec/lib/command_spec.rb +68 -0
  155. data/spec/lib/config_spec.rb +64 -0
  156. data/spec/lib/context_spec.rb +165 -0
  157. data/spec/lib/ext_spec.rb +72 -0
  158. data/spec/lib/external_apps_spec.rb +45 -0
  159. data/spec/lib/filter_spec.rb +80 -0
  160. data/spec/lib/flat_map_spec.rb +100 -0
  161. data/spec/lib/group_spec.rb +109 -0
  162. data/spec/lib/helper_spec.rb +19 -0
  163. data/spec/lib/key_spec.rb +41 -0
  164. data/spec/lib/manipulation_spec.rb +122 -0
  165. data/spec/lib/map_partitions_spec.rb +87 -0
  166. data/spec/lib/map_spec.rb +91 -0
  167. data/spec/lib/mllib/classification_spec.rb +54 -0
  168. data/spec/lib/mllib/clustering_spec.rb +35 -0
  169. data/spec/lib/mllib/matrix_spec.rb +32 -0
  170. data/spec/lib/mllib/regression_spec.rb +116 -0
  171. data/spec/lib/mllib/vector_spec.rb +77 -0
  172. data/spec/lib/reduce_by_key_spec.rb +118 -0
  173. data/spec/lib/reduce_spec.rb +131 -0
  174. data/spec/lib/sample_spec.rb +46 -0
  175. data/spec/lib/serializer_spec.rb +88 -0
  176. data/spec/lib/sort_spec.rb +58 -0
  177. data/spec/lib/statistic_spec.rb +170 -0
  178. data/spec/lib/whole_text_files_spec.rb +33 -0
  179. data/spec/spec_helper.rb +38 -0
  180. metadata +389 -0
@@ -0,0 +1,50 @@
1
+ 551
2
+ 552
3
+ 553
4
+ 554
5
+ 555
6
+ 556
7
+ 557
8
+ 558
9
+ 559
10
+ 560
11
+ 561
12
+ 562
13
+ 563
14
+ 564
15
+ 565
16
+ 566
17
+ 567
18
+ 568
19
+ 569
20
+ 570
21
+ 571
22
+ 572
23
+ 573
24
+ 574
25
+ 575
26
+ 576
27
+ 577
28
+ 578
29
+ 579
30
+ 580
31
+ 581
32
+ 582
33
+ 583
34
+ 584
35
+ 585
36
+ 586
37
+ 587
38
+ 588
39
+ 589
40
+ 590
41
+ 591
42
+ 592
43
+ 593
44
+ 594
45
+ 595
46
+ 596
47
+ 597
48
+ 598
49
+ 599
50
+ 600
@@ -0,0 +1,50 @@
1
+ 601
2
+ 602
3
+ 603
4
+ 604
5
+ 605
6
+ 606
7
+ 607
8
+ 608
9
+ 609
10
+ 610
11
+ 611
12
+ 612
13
+ 613
14
+ 614
15
+ 615
16
+ 616
17
+ 617
18
+ 618
19
+ 619
20
+ 620
21
+ 621
22
+ 622
23
+ 623
24
+ 624
25
+ 625
26
+ 626
27
+ 627
28
+ 628
29
+ 629
30
+ 630
31
+ 631
32
+ 632
33
+ 633
34
+ 634
35
+ 635
36
+ 636
37
+ 637
38
+ 638
39
+ 639
40
+ 640
41
+ 641
42
+ 642
43
+ 643
44
+ 644
45
+ 645
46
+ 646
47
+ 647
48
+ 648
49
+ 649
50
+ 650
@@ -0,0 +1,50 @@
1
+ 651
2
+ 652
3
+ 653
4
+ 654
5
+ 655
6
+ 656
7
+ 657
8
+ 658
9
+ 659
10
+ 660
11
+ 661
12
+ 662
13
+ 663
14
+ 664
15
+ 665
16
+ 666
17
+ 667
18
+ 668
19
+ 669
20
+ 670
21
+ 671
22
+ 672
23
+ 673
24
+ 674
25
+ 675
26
+ 676
27
+ 677
28
+ 678
29
+ 679
30
+ 680
31
+ 681
32
+ 682
33
+ 683
34
+ 684
35
+ 685
36
+ 686
37
+ 687
38
+ 688
39
+ 689
40
+ 690
41
+ 691
42
+ 692
43
+ 693
44
+ 694
45
+ 695
46
+ 696
47
+ 697
48
+ 698
49
+ 699
50
+ 700
@@ -0,0 +1,50 @@
1
+ 701
2
+ 702
3
+ 703
4
+ 704
5
+ 705
6
+ 706
7
+ 707
8
+ 708
9
+ 709
10
+ 710
11
+ 711
12
+ 712
13
+ 713
14
+ 714
15
+ 715
16
+ 716
17
+ 717
18
+ 718
19
+ 719
20
+ 720
21
+ 721
22
+ 722
23
+ 723
24
+ 724
25
+ 725
26
+ 726
27
+ 727
28
+ 728
29
+ 729
30
+ 730
31
+ 731
32
+ 732
33
+ 733
34
+ 734
35
+ 735
36
+ 736
37
+ 737
38
+ 738
39
+ 739
40
+ 740
41
+ 741
42
+ 742
43
+ 743
44
+ 744
45
+ 745
46
+ 746
47
+ 747
48
+ 748
49
+ 749
50
+ 750
@@ -0,0 +1,50 @@
1
+ 751
2
+ 752
3
+ 753
4
+ 754
5
+ 755
6
+ 756
7
+ 757
8
+ 758
9
+ 759
10
+ 760
11
+ 761
12
+ 762
13
+ 763
14
+ 764
15
+ 765
16
+ 766
17
+ 767
18
+ 768
19
+ 769
20
+ 770
21
+ 771
22
+ 772
23
+ 773
24
+ 774
25
+ 775
26
+ 776
27
+ 777
28
+ 778
29
+ 779
30
+ 780
31
+ 781
32
+ 782
33
+ 783
34
+ 784
35
+ 785
36
+ 786
37
+ 787
38
+ 788
39
+ 789
40
+ 790
41
+ 791
42
+ 792
43
+ 793
44
+ 794
45
+ 795
46
+ 796
47
+ 797
48
+ 798
49
+ 799
50
+ 800
@@ -0,0 +1,50 @@
1
+ 801
2
+ 802
3
+ 803
4
+ 804
5
+ 805
6
+ 806
7
+ 807
8
+ 808
9
+ 809
10
+ 810
11
+ 811
12
+ 812
13
+ 813
14
+ 814
15
+ 815
16
+ 816
17
+ 817
18
+ 818
19
+ 819
20
+ 820
21
+ 821
22
+ 822
23
+ 823
24
+ 824
25
+ 825
26
+ 826
27
+ 827
28
+ 828
29
+ 829
30
+ 830
31
+ 831
32
+ 832
33
+ 833
34
+ 834
35
+ 835
36
+ 836
37
+ 837
38
+ 838
39
+ 839
40
+ 840
41
+ 841
42
+ 842
43
+ 843
44
+ 844
45
+ 845
46
+ 846
47
+ 847
48
+ 848
49
+ 849
50
+ 850
@@ -0,0 +1,50 @@
1
+ 851
2
+ 852
3
+ 853
4
+ 854
5
+ 855
6
+ 856
7
+ 857
8
+ 858
9
+ 859
10
+ 860
11
+ 861
12
+ 862
13
+ 863
14
+ 864
15
+ 865
16
+ 866
17
+ 867
18
+ 868
19
+ 869
20
+ 870
21
+ 871
22
+ 872
23
+ 873
24
+ 874
25
+ 875
26
+ 876
27
+ 877
28
+ 878
29
+ 879
30
+ 880
31
+ 881
32
+ 882
33
+ 883
34
+ 884
35
+ 885
36
+ 886
37
+ 887
38
+ 888
39
+ 889
40
+ 890
41
+ 891
42
+ 892
43
+ 893
44
+ 894
45
+ 895
46
+ 896
47
+ 897
48
+ 898
49
+ 899
50
+ 900
@@ -0,0 +1,50 @@
1
+ 901
2
+ 902
3
+ 903
4
+ 904
5
+ 905
6
+ 906
7
+ 907
8
+ 908
9
+ 909
10
+ 910
11
+ 911
12
+ 912
13
+ 913
14
+ 914
15
+ 915
16
+ 916
17
+ 917
18
+ 918
19
+ 919
20
+ 920
21
+ 921
22
+ 922
23
+ 923
24
+ 924
25
+ 925
26
+ 926
27
+ 927
28
+ 928
29
+ 929
30
+ 930
31
+ 931
32
+ 932
33
+ 933
34
+ 934
35
+ 935
36
+ 936
37
+ 937
38
+ 938
39
+ 939
40
+ 940
41
+ 941
42
+ 942
43
+ 943
44
+ 944
45
+ 945
46
+ 946
47
+ 947
48
+ 948
49
+ 949
50
+ 950
@@ -0,0 +1,50 @@
1
+ 51
2
+ 52
3
+ 53
4
+ 54
5
+ 55
6
+ 56
7
+ 57
8
+ 58
9
+ 59
10
+ 60
11
+ 61
12
+ 62
13
+ 63
14
+ 64
15
+ 65
16
+ 66
17
+ 67
18
+ 68
19
+ 69
20
+ 70
21
+ 71
22
+ 72
23
+ 73
24
+ 74
25
+ 75
26
+ 76
27
+ 77
28
+ 78
29
+ 79
30
+ 80
31
+ 81
32
+ 82
33
+ 83
34
+ 84
35
+ 85
36
+ 86
37
+ 87
38
+ 88
39
+ 89
40
+ 90
41
+ 91
42
+ 92
43
+ 93
44
+ 94
45
+ 95
46
+ 96
47
+ 97
48
+ 98
49
+ 99
50
+ 100