ruby-spark 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +37 -0
  3. data/Gemfile +47 -0
  4. data/Guardfile +5 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +185 -0
  7. data/Rakefile +35 -0
  8. data/TODO.md +7 -0
  9. data/benchmark/aggregate.rb +33 -0
  10. data/benchmark/bisect.rb +88 -0
  11. data/benchmark/custom_marshal.rb +94 -0
  12. data/benchmark/digest.rb +150 -0
  13. data/benchmark/enumerator.rb +88 -0
  14. data/benchmark/performance/prepare.sh +18 -0
  15. data/benchmark/performance/python.py +156 -0
  16. data/benchmark/performance/r.r +69 -0
  17. data/benchmark/performance/ruby.rb +167 -0
  18. data/benchmark/performance/run-all.sh +160 -0
  19. data/benchmark/performance/scala.scala +181 -0
  20. data/benchmark/serializer.rb +82 -0
  21. data/benchmark/sort.rb +43 -0
  22. data/benchmark/sort2.rb +164 -0
  23. data/benchmark/take.rb +28 -0
  24. data/bin/ruby-spark +8 -0
  25. data/example/pi.rb +28 -0
  26. data/ext/ruby_c/extconf.rb +3 -0
  27. data/ext/ruby_c/murmur.c +158 -0
  28. data/ext/ruby_c/murmur.h +9 -0
  29. data/ext/ruby_c/ruby-spark.c +18 -0
  30. data/ext/ruby_java/Digest.java +36 -0
  31. data/ext/ruby_java/Murmur2.java +98 -0
  32. data/ext/ruby_java/RubySparkExtService.java +28 -0
  33. data/ext/ruby_java/extconf.rb +3 -0
  34. data/ext/spark/build.sbt +73 -0
  35. data/ext/spark/project/plugins.sbt +9 -0
  36. data/ext/spark/sbt/sbt +34 -0
  37. data/ext/spark/src/main/scala/Exec.scala +91 -0
  38. data/ext/spark/src/main/scala/MLLibAPI.scala +4 -0
  39. data/ext/spark/src/main/scala/Marshal.scala +52 -0
  40. data/ext/spark/src/main/scala/MarshalDump.scala +113 -0
  41. data/ext/spark/src/main/scala/MarshalLoad.scala +220 -0
  42. data/ext/spark/src/main/scala/RubyAccumulatorParam.scala +69 -0
  43. data/ext/spark/src/main/scala/RubyBroadcast.scala +13 -0
  44. data/ext/spark/src/main/scala/RubyConstant.scala +13 -0
  45. data/ext/spark/src/main/scala/RubyMLLibAPI.scala +55 -0
  46. data/ext/spark/src/main/scala/RubyMLLibUtilAPI.scala +21 -0
  47. data/ext/spark/src/main/scala/RubyPage.scala +34 -0
  48. data/ext/spark/src/main/scala/RubyRDD.scala +364 -0
  49. data/ext/spark/src/main/scala/RubySerializer.scala +14 -0
  50. data/ext/spark/src/main/scala/RubyTab.scala +11 -0
  51. data/ext/spark/src/main/scala/RubyUtils.scala +15 -0
  52. data/ext/spark/src/main/scala/RubyWorker.scala +257 -0
  53. data/ext/spark/src/test/scala/MarshalSpec.scala +84 -0
  54. data/lib/ruby-spark.rb +1 -0
  55. data/lib/spark.rb +198 -0
  56. data/lib/spark/accumulator.rb +260 -0
  57. data/lib/spark/broadcast.rb +98 -0
  58. data/lib/spark/build.rb +43 -0
  59. data/lib/spark/cli.rb +169 -0
  60. data/lib/spark/command.rb +86 -0
  61. data/lib/spark/command/base.rb +154 -0
  62. data/lib/spark/command/basic.rb +345 -0
  63. data/lib/spark/command/pair.rb +124 -0
  64. data/lib/spark/command/sort.rb +51 -0
  65. data/lib/spark/command/statistic.rb +144 -0
  66. data/lib/spark/command_builder.rb +141 -0
  67. data/lib/spark/command_validator.rb +34 -0
  68. data/lib/spark/config.rb +244 -0
  69. data/lib/spark/constant.rb +14 -0
  70. data/lib/spark/context.rb +304 -0
  71. data/lib/spark/error.rb +50 -0
  72. data/lib/spark/ext/hash.rb +41 -0
  73. data/lib/spark/ext/integer.rb +25 -0
  74. data/lib/spark/ext/io.rb +57 -0
  75. data/lib/spark/ext/ip_socket.rb +29 -0
  76. data/lib/spark/ext/module.rb +58 -0
  77. data/lib/spark/ext/object.rb +24 -0
  78. data/lib/spark/ext/string.rb +24 -0
  79. data/lib/spark/helper.rb +10 -0
  80. data/lib/spark/helper/logger.rb +40 -0
  81. data/lib/spark/helper/parser.rb +85 -0
  82. data/lib/spark/helper/serialize.rb +71 -0
  83. data/lib/spark/helper/statistic.rb +93 -0
  84. data/lib/spark/helper/system.rb +42 -0
  85. data/lib/spark/java_bridge.rb +19 -0
  86. data/lib/spark/java_bridge/base.rb +203 -0
  87. data/lib/spark/java_bridge/jruby.rb +23 -0
  88. data/lib/spark/java_bridge/rjb.rb +41 -0
  89. data/lib/spark/logger.rb +76 -0
  90. data/lib/spark/mllib.rb +100 -0
  91. data/lib/spark/mllib/classification/common.rb +31 -0
  92. data/lib/spark/mllib/classification/logistic_regression.rb +223 -0
  93. data/lib/spark/mllib/classification/naive_bayes.rb +97 -0
  94. data/lib/spark/mllib/classification/svm.rb +135 -0
  95. data/lib/spark/mllib/clustering/gaussian_mixture.rb +82 -0
  96. data/lib/spark/mllib/clustering/kmeans.rb +118 -0
  97. data/lib/spark/mllib/matrix.rb +120 -0
  98. data/lib/spark/mllib/regression/common.rb +73 -0
  99. data/lib/spark/mllib/regression/labeled_point.rb +41 -0
  100. data/lib/spark/mllib/regression/lasso.rb +100 -0
  101. data/lib/spark/mllib/regression/linear.rb +124 -0
  102. data/lib/spark/mllib/regression/ridge.rb +97 -0
  103. data/lib/spark/mllib/ruby_matrix/matrix_adapter.rb +53 -0
  104. data/lib/spark/mllib/ruby_matrix/vector_adapter.rb +57 -0
  105. data/lib/spark/mllib/stat/distribution.rb +12 -0
  106. data/lib/spark/mllib/vector.rb +185 -0
  107. data/lib/spark/rdd.rb +1328 -0
  108. data/lib/spark/sampler.rb +92 -0
  109. data/lib/spark/serializer.rb +24 -0
  110. data/lib/spark/serializer/base.rb +170 -0
  111. data/lib/spark/serializer/cartesian.rb +37 -0
  112. data/lib/spark/serializer/marshal.rb +19 -0
  113. data/lib/spark/serializer/message_pack.rb +25 -0
  114. data/lib/spark/serializer/oj.rb +25 -0
  115. data/lib/spark/serializer/pair.rb +27 -0
  116. data/lib/spark/serializer/utf8.rb +25 -0
  117. data/lib/spark/sort.rb +189 -0
  118. data/lib/spark/stat_counter.rb +125 -0
  119. data/lib/spark/storage_level.rb +39 -0
  120. data/lib/spark/version.rb +3 -0
  121. data/lib/spark/worker/master.rb +144 -0
  122. data/lib/spark/worker/spark_files.rb +15 -0
  123. data/lib/spark/worker/worker.rb +197 -0
  124. data/ruby-spark.gemspec +36 -0
  125. data/spec/generator.rb +37 -0
  126. data/spec/inputs/lorem_300.txt +316 -0
  127. data/spec/inputs/numbers/1.txt +50 -0
  128. data/spec/inputs/numbers/10.txt +50 -0
  129. data/spec/inputs/numbers/11.txt +50 -0
  130. data/spec/inputs/numbers/12.txt +50 -0
  131. data/spec/inputs/numbers/13.txt +50 -0
  132. data/spec/inputs/numbers/14.txt +50 -0
  133. data/spec/inputs/numbers/15.txt +50 -0
  134. data/spec/inputs/numbers/16.txt +50 -0
  135. data/spec/inputs/numbers/17.txt +50 -0
  136. data/spec/inputs/numbers/18.txt +50 -0
  137. data/spec/inputs/numbers/19.txt +50 -0
  138. data/spec/inputs/numbers/2.txt +50 -0
  139. data/spec/inputs/numbers/20.txt +50 -0
  140. data/spec/inputs/numbers/3.txt +50 -0
  141. data/spec/inputs/numbers/4.txt +50 -0
  142. data/spec/inputs/numbers/5.txt +50 -0
  143. data/spec/inputs/numbers/6.txt +50 -0
  144. data/spec/inputs/numbers/7.txt +50 -0
  145. data/spec/inputs/numbers/8.txt +50 -0
  146. data/spec/inputs/numbers/9.txt +50 -0
  147. data/spec/inputs/numbers_0_100.txt +101 -0
  148. data/spec/inputs/numbers_1_100.txt +100 -0
  149. data/spec/lib/collect_spec.rb +42 -0
  150. data/spec/lib/command_spec.rb +68 -0
  151. data/spec/lib/config_spec.rb +64 -0
  152. data/spec/lib/context_spec.rb +163 -0
  153. data/spec/lib/ext_spec.rb +72 -0
  154. data/spec/lib/external_apps_spec.rb +45 -0
  155. data/spec/lib/filter_spec.rb +80 -0
  156. data/spec/lib/flat_map_spec.rb +100 -0
  157. data/spec/lib/group_spec.rb +109 -0
  158. data/spec/lib/helper_spec.rb +19 -0
  159. data/spec/lib/key_spec.rb +41 -0
  160. data/spec/lib/manipulation_spec.rb +114 -0
  161. data/spec/lib/map_partitions_spec.rb +87 -0
  162. data/spec/lib/map_spec.rb +91 -0
  163. data/spec/lib/mllib/classification_spec.rb +54 -0
  164. data/spec/lib/mllib/clustering_spec.rb +35 -0
  165. data/spec/lib/mllib/matrix_spec.rb +32 -0
  166. data/spec/lib/mllib/regression_spec.rb +116 -0
  167. data/spec/lib/mllib/vector_spec.rb +77 -0
  168. data/spec/lib/reduce_by_key_spec.rb +118 -0
  169. data/spec/lib/reduce_spec.rb +131 -0
  170. data/spec/lib/sample_spec.rb +46 -0
  171. data/spec/lib/serializer_spec.rb +13 -0
  172. data/spec/lib/sort_spec.rb +58 -0
  173. data/spec/lib/statistic_spec.rb +168 -0
  174. data/spec/lib/whole_text_files_spec.rb +33 -0
  175. data/spec/spec_helper.rb +39 -0
  176. metadata +301 -0
@@ -0,0 +1,50 @@
1
+ 851
2
+ 852
3
+ 853
4
+ 854
5
+ 855
6
+ 856
7
+ 857
8
+ 858
9
+ 859
10
+ 860
11
+ 861
12
+ 862
13
+ 863
14
+ 864
15
+ 865
16
+ 866
17
+ 867
18
+ 868
19
+ 869
20
+ 870
21
+ 871
22
+ 872
23
+ 873
24
+ 874
25
+ 875
26
+ 876
27
+ 877
28
+ 878
29
+ 879
30
+ 880
31
+ 881
32
+ 882
33
+ 883
34
+ 884
35
+ 885
36
+ 886
37
+ 887
38
+ 888
39
+ 889
40
+ 890
41
+ 891
42
+ 892
43
+ 893
44
+ 894
45
+ 895
46
+ 896
47
+ 897
48
+ 898
49
+ 899
50
+ 900
@@ -0,0 +1,50 @@
1
+ 901
2
+ 902
3
+ 903
4
+ 904
5
+ 905
6
+ 906
7
+ 907
8
+ 908
9
+ 909
10
+ 910
11
+ 911
12
+ 912
13
+ 913
14
+ 914
15
+ 915
16
+ 916
17
+ 917
18
+ 918
19
+ 919
20
+ 920
21
+ 921
22
+ 922
23
+ 923
24
+ 924
25
+ 925
26
+ 926
27
+ 927
28
+ 928
29
+ 929
30
+ 930
31
+ 931
32
+ 932
33
+ 933
34
+ 934
35
+ 935
36
+ 936
37
+ 937
38
+ 938
39
+ 939
40
+ 940
41
+ 941
42
+ 942
43
+ 943
44
+ 944
45
+ 945
46
+ 946
47
+ 947
48
+ 948
49
+ 949
50
+ 950
@@ -0,0 +1,50 @@
1
+ 51
2
+ 52
3
+ 53
4
+ 54
5
+ 55
6
+ 56
7
+ 57
8
+ 58
9
+ 59
10
+ 60
11
+ 61
12
+ 62
13
+ 63
14
+ 64
15
+ 65
16
+ 66
17
+ 67
18
+ 68
19
+ 69
20
+ 70
21
+ 71
22
+ 72
23
+ 73
24
+ 74
25
+ 75
26
+ 76
27
+ 77
28
+ 78
29
+ 79
30
+ 80
31
+ 81
32
+ 82
33
+ 83
34
+ 84
35
+ 85
36
+ 86
37
+ 87
38
+ 88
39
+ 89
40
+ 90
41
+ 91
42
+ 92
43
+ 93
44
+ 94
45
+ 95
46
+ 96
47
+ 97
48
+ 98
49
+ 99
50
+ 100
@@ -0,0 +1,50 @@
1
+ 951
2
+ 952
3
+ 953
4
+ 954
5
+ 955
6
+ 956
7
+ 957
8
+ 958
9
+ 959
10
+ 960
11
+ 961
12
+ 962
13
+ 963
14
+ 964
15
+ 965
16
+ 966
17
+ 967
18
+ 968
19
+ 969
20
+ 970
21
+ 971
22
+ 972
23
+ 973
24
+ 974
25
+ 975
26
+ 976
27
+ 977
28
+ 978
29
+ 979
30
+ 980
31
+ 981
32
+ 982
33
+ 983
34
+ 984
35
+ 985
36
+ 986
37
+ 987
38
+ 988
39
+ 989
40
+ 990
41
+ 991
42
+ 992
43
+ 993
44
+ 994
45
+ 995
46
+ 996
47
+ 997
48
+ 998
49
+ 999
50
+ 1000
@@ -0,0 +1,50 @@
1
+ 101
2
+ 102
3
+ 103
4
+ 104
5
+ 105
6
+ 106
7
+ 107
8
+ 108
9
+ 109
10
+ 110
11
+ 111
12
+ 112
13
+ 113
14
+ 114
15
+ 115
16
+ 116
17
+ 117
18
+ 118
19
+ 119
20
+ 120
21
+ 121
22
+ 122
23
+ 123
24
+ 124
25
+ 125
26
+ 126
27
+ 127
28
+ 128
29
+ 129
30
+ 130
31
+ 131
32
+ 132
33
+ 133
34
+ 134
35
+ 135
36
+ 136
37
+ 137
38
+ 138
39
+ 139
40
+ 140
41
+ 141
42
+ 142
43
+ 143
44
+ 144
45
+ 145
46
+ 146
47
+ 147
48
+ 148
49
+ 149
50
+ 150
@@ -0,0 +1,50 @@
1
+ 151
2
+ 152
3
+ 153
4
+ 154
5
+ 155
6
+ 156
7
+ 157
8
+ 158
9
+ 159
10
+ 160
11
+ 161
12
+ 162
13
+ 163
14
+ 164
15
+ 165
16
+ 166
17
+ 167
18
+ 168
19
+ 169
20
+ 170
21
+ 171
22
+ 172
23
+ 173
24
+ 174
25
+ 175
26
+ 176
27
+ 177
28
+ 178
29
+ 179
30
+ 180
31
+ 181
32
+ 182
33
+ 183
34
+ 184
35
+ 185
36
+ 186
37
+ 187
38
+ 188
39
+ 189
40
+ 190
41
+ 191
42
+ 192
43
+ 193
44
+ 194
45
+ 195
46
+ 196
47
+ 197
48
+ 198
49
+ 199
50
+ 200
@@ -0,0 +1,50 @@
1
+ 201
2
+ 202
3
+ 203
4
+ 204
5
+ 205
6
+ 206
7
+ 207
8
+ 208
9
+ 209
10
+ 210
11
+ 211
12
+ 212
13
+ 213
14
+ 214
15
+ 215
16
+ 216
17
+ 217
18
+ 218
19
+ 219
20
+ 220
21
+ 221
22
+ 222
23
+ 223
24
+ 224
25
+ 225
26
+ 226
27
+ 227
28
+ 228
29
+ 229
30
+ 230
31
+ 231
32
+ 232
33
+ 233
34
+ 234
35
+ 235
36
+ 236
37
+ 237
38
+ 238
39
+ 239
40
+ 240
41
+ 241
42
+ 242
43
+ 243
44
+ 244
45
+ 245
46
+ 246
47
+ 247
48
+ 248
49
+ 249
50
+ 250
@@ -0,0 +1,50 @@
1
+ 251
2
+ 252
3
+ 253
4
+ 254
5
+ 255
6
+ 256
7
+ 257
8
+ 258
9
+ 259
10
+ 260
11
+ 261
12
+ 262
13
+ 263
14
+ 264
15
+ 265
16
+ 266
17
+ 267
18
+ 268
19
+ 269
20
+ 270
21
+ 271
22
+ 272
23
+ 273
24
+ 274
25
+ 275
26
+ 276
27
+ 277
28
+ 278
29
+ 279
30
+ 280
31
+ 281
32
+ 282
33
+ 283
34
+ 284
35
+ 285
36
+ 286
37
+ 287
38
+ 288
39
+ 289
40
+ 290
41
+ 291
42
+ 292
43
+ 293
44
+ 294
45
+ 295
46
+ 296
47
+ 297
48
+ 298
49
+ 299
50
+ 300
@@ -0,0 +1,50 @@
1
+ 301
2
+ 302
3
+ 303
4
+ 304
5
+ 305
6
+ 306
7
+ 307
8
+ 308
9
+ 309
10
+ 310
11
+ 311
12
+ 312
13
+ 313
14
+ 314
15
+ 315
16
+ 316
17
+ 317
18
+ 318
19
+ 319
20
+ 320
21
+ 321
22
+ 322
23
+ 323
24
+ 324
25
+ 325
26
+ 326
27
+ 327
28
+ 328
29
+ 329
30
+ 330
31
+ 331
32
+ 332
33
+ 333
34
+ 334
35
+ 335
36
+ 336
37
+ 337
38
+ 338
39
+ 339
40
+ 340
41
+ 341
42
+ 342
43
+ 343
44
+ 344
45
+ 345
46
+ 346
47
+ 347
48
+ 348
49
+ 349
50
+ 350