ruby-spark 1.1.0.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +37 -0
  3. data/Gemfile +47 -0
  4. data/Guardfile +5 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +252 -0
  7. data/Rakefile +35 -0
  8. data/TODO.md +6 -0
  9. data/benchmark/aggregate.rb +33 -0
  10. data/benchmark/bisect.rb +88 -0
  11. data/benchmark/comparison/prepare.sh +18 -0
  12. data/benchmark/comparison/python.py +156 -0
  13. data/benchmark/comparison/r.r +69 -0
  14. data/benchmark/comparison/ruby.rb +167 -0
  15. data/benchmark/comparison/run-all.sh +160 -0
  16. data/benchmark/comparison/scala.scala +181 -0
  17. data/benchmark/custom_marshal.rb +94 -0
  18. data/benchmark/digest.rb +150 -0
  19. data/benchmark/enumerator.rb +88 -0
  20. data/benchmark/serializer.rb +82 -0
  21. data/benchmark/sort.rb +43 -0
  22. data/benchmark/sort2.rb +164 -0
  23. data/benchmark/take.rb +28 -0
  24. data/bin/ruby-spark +8 -0
  25. data/example/pi.rb +28 -0
  26. data/example/website_search.rb +83 -0
  27. data/ext/ruby_c/extconf.rb +3 -0
  28. data/ext/ruby_c/murmur.c +158 -0
  29. data/ext/ruby_c/murmur.h +9 -0
  30. data/ext/ruby_c/ruby-spark.c +18 -0
  31. data/ext/ruby_java/Digest.java +36 -0
  32. data/ext/ruby_java/Murmur2.java +98 -0
  33. data/ext/ruby_java/RubySparkExtService.java +28 -0
  34. data/ext/ruby_java/extconf.rb +3 -0
  35. data/ext/spark/build.sbt +73 -0
  36. data/ext/spark/project/plugins.sbt +9 -0
  37. data/ext/spark/sbt/sbt +34 -0
  38. data/ext/spark/src/main/scala/Exec.scala +91 -0
  39. data/ext/spark/src/main/scala/MLLibAPI.scala +4 -0
  40. data/ext/spark/src/main/scala/Marshal.scala +52 -0
  41. data/ext/spark/src/main/scala/MarshalDump.scala +113 -0
  42. data/ext/spark/src/main/scala/MarshalLoad.scala +220 -0
  43. data/ext/spark/src/main/scala/RubyAccumulatorParam.scala +69 -0
  44. data/ext/spark/src/main/scala/RubyBroadcast.scala +13 -0
  45. data/ext/spark/src/main/scala/RubyConstant.scala +13 -0
  46. data/ext/spark/src/main/scala/RubyMLLibAPI.scala +55 -0
  47. data/ext/spark/src/main/scala/RubyMLLibUtilAPI.scala +21 -0
  48. data/ext/spark/src/main/scala/RubyPage.scala +34 -0
  49. data/ext/spark/src/main/scala/RubyRDD.scala +392 -0
  50. data/ext/spark/src/main/scala/RubySerializer.scala +14 -0
  51. data/ext/spark/src/main/scala/RubyTab.scala +11 -0
  52. data/ext/spark/src/main/scala/RubyUtils.scala +15 -0
  53. data/ext/spark/src/main/scala/RubyWorker.scala +257 -0
  54. data/ext/spark/src/test/scala/MarshalSpec.scala +84 -0
  55. data/lib/ruby-spark.rb +1 -0
  56. data/lib/spark.rb +198 -0
  57. data/lib/spark/accumulator.rb +260 -0
  58. data/lib/spark/broadcast.rb +98 -0
  59. data/lib/spark/build.rb +43 -0
  60. data/lib/spark/cli.rb +169 -0
  61. data/lib/spark/command.rb +86 -0
  62. data/lib/spark/command/base.rb +158 -0
  63. data/lib/spark/command/basic.rb +345 -0
  64. data/lib/spark/command/pair.rb +124 -0
  65. data/lib/spark/command/sort.rb +51 -0
  66. data/lib/spark/command/statistic.rb +144 -0
  67. data/lib/spark/command_builder.rb +141 -0
  68. data/lib/spark/command_validator.rb +34 -0
  69. data/lib/spark/config.rb +238 -0
  70. data/lib/spark/constant.rb +14 -0
  71. data/lib/spark/context.rb +322 -0
  72. data/lib/spark/error.rb +50 -0
  73. data/lib/spark/ext/hash.rb +41 -0
  74. data/lib/spark/ext/integer.rb +25 -0
  75. data/lib/spark/ext/io.rb +67 -0
  76. data/lib/spark/ext/ip_socket.rb +29 -0
  77. data/lib/spark/ext/module.rb +58 -0
  78. data/lib/spark/ext/object.rb +24 -0
  79. data/lib/spark/ext/string.rb +24 -0
  80. data/lib/spark/helper.rb +10 -0
  81. data/lib/spark/helper/logger.rb +40 -0
  82. data/lib/spark/helper/parser.rb +85 -0
  83. data/lib/spark/helper/serialize.rb +71 -0
  84. data/lib/spark/helper/statistic.rb +93 -0
  85. data/lib/spark/helper/system.rb +42 -0
  86. data/lib/spark/java_bridge.rb +19 -0
  87. data/lib/spark/java_bridge/base.rb +203 -0
  88. data/lib/spark/java_bridge/jruby.rb +23 -0
  89. data/lib/spark/java_bridge/rjb.rb +41 -0
  90. data/lib/spark/logger.rb +76 -0
  91. data/lib/spark/mllib.rb +100 -0
  92. data/lib/spark/mllib/classification/common.rb +31 -0
  93. data/lib/spark/mllib/classification/logistic_regression.rb +223 -0
  94. data/lib/spark/mllib/classification/naive_bayes.rb +97 -0
  95. data/lib/spark/mllib/classification/svm.rb +135 -0
  96. data/lib/spark/mllib/clustering/gaussian_mixture.rb +82 -0
  97. data/lib/spark/mllib/clustering/kmeans.rb +118 -0
  98. data/lib/spark/mllib/matrix.rb +120 -0
  99. data/lib/spark/mllib/regression/common.rb +73 -0
  100. data/lib/spark/mllib/regression/labeled_point.rb +41 -0
  101. data/lib/spark/mllib/regression/lasso.rb +100 -0
  102. data/lib/spark/mllib/regression/linear.rb +124 -0
  103. data/lib/spark/mllib/regression/ridge.rb +97 -0
  104. data/lib/spark/mllib/ruby_matrix/matrix_adapter.rb +53 -0
  105. data/lib/spark/mllib/ruby_matrix/vector_adapter.rb +57 -0
  106. data/lib/spark/mllib/stat/distribution.rb +12 -0
  107. data/lib/spark/mllib/vector.rb +185 -0
  108. data/lib/spark/rdd.rb +1377 -0
  109. data/lib/spark/sampler.rb +92 -0
  110. data/lib/spark/serializer.rb +79 -0
  111. data/lib/spark/serializer/auto_batched.rb +59 -0
  112. data/lib/spark/serializer/base.rb +63 -0
  113. data/lib/spark/serializer/batched.rb +84 -0
  114. data/lib/spark/serializer/cartesian.rb +13 -0
  115. data/lib/spark/serializer/compressed.rb +27 -0
  116. data/lib/spark/serializer/marshal.rb +17 -0
  117. data/lib/spark/serializer/message_pack.rb +23 -0
  118. data/lib/spark/serializer/oj.rb +23 -0
  119. data/lib/spark/serializer/pair.rb +41 -0
  120. data/lib/spark/serializer/text.rb +25 -0
  121. data/lib/spark/sort.rb +189 -0
  122. data/lib/spark/stat_counter.rb +125 -0
  123. data/lib/spark/storage_level.rb +39 -0
  124. data/lib/spark/version.rb +3 -0
  125. data/lib/spark/worker/master.rb +144 -0
  126. data/lib/spark/worker/spark_files.rb +15 -0
  127. data/lib/spark/worker/worker.rb +200 -0
  128. data/ruby-spark.gemspec +47 -0
  129. data/spec/generator.rb +37 -0
  130. data/spec/inputs/lorem_300.txt +316 -0
  131. data/spec/inputs/numbers/1.txt +50 -0
  132. data/spec/inputs/numbers/10.txt +50 -0
  133. data/spec/inputs/numbers/11.txt +50 -0
  134. data/spec/inputs/numbers/12.txt +50 -0
  135. data/spec/inputs/numbers/13.txt +50 -0
  136. data/spec/inputs/numbers/14.txt +50 -0
  137. data/spec/inputs/numbers/15.txt +50 -0
  138. data/spec/inputs/numbers/16.txt +50 -0
  139. data/spec/inputs/numbers/17.txt +50 -0
  140. data/spec/inputs/numbers/18.txt +50 -0
  141. data/spec/inputs/numbers/19.txt +50 -0
  142. data/spec/inputs/numbers/2.txt +50 -0
  143. data/spec/inputs/numbers/20.txt +50 -0
  144. data/spec/inputs/numbers/3.txt +50 -0
  145. data/spec/inputs/numbers/4.txt +50 -0
  146. data/spec/inputs/numbers/5.txt +50 -0
  147. data/spec/inputs/numbers/6.txt +50 -0
  148. data/spec/inputs/numbers/7.txt +50 -0
  149. data/spec/inputs/numbers/8.txt +50 -0
  150. data/spec/inputs/numbers/9.txt +50 -0
  151. data/spec/inputs/numbers_0_100.txt +101 -0
  152. data/spec/inputs/numbers_1_100.txt +100 -0
  153. data/spec/lib/collect_spec.rb +42 -0
  154. data/spec/lib/command_spec.rb +68 -0
  155. data/spec/lib/config_spec.rb +64 -0
  156. data/spec/lib/context_spec.rb +165 -0
  157. data/spec/lib/ext_spec.rb +72 -0
  158. data/spec/lib/external_apps_spec.rb +45 -0
  159. data/spec/lib/filter_spec.rb +80 -0
  160. data/spec/lib/flat_map_spec.rb +100 -0
  161. data/spec/lib/group_spec.rb +109 -0
  162. data/spec/lib/helper_spec.rb +19 -0
  163. data/spec/lib/key_spec.rb +41 -0
  164. data/spec/lib/manipulation_spec.rb +122 -0
  165. data/spec/lib/map_partitions_spec.rb +87 -0
  166. data/spec/lib/map_spec.rb +91 -0
  167. data/spec/lib/mllib/classification_spec.rb +54 -0
  168. data/spec/lib/mllib/clustering_spec.rb +35 -0
  169. data/spec/lib/mllib/matrix_spec.rb +32 -0
  170. data/spec/lib/mllib/regression_spec.rb +116 -0
  171. data/spec/lib/mllib/vector_spec.rb +77 -0
  172. data/spec/lib/reduce_by_key_spec.rb +118 -0
  173. data/spec/lib/reduce_spec.rb +131 -0
  174. data/spec/lib/sample_spec.rb +46 -0
  175. data/spec/lib/serializer_spec.rb +88 -0
  176. data/spec/lib/sort_spec.rb +58 -0
  177. data/spec/lib/statistic_spec.rb +170 -0
  178. data/spec/lib/whole_text_files_spec.rb +33 -0
  179. data/spec/spec_helper.rb +38 -0
  180. metadata +389 -0
@@ -0,0 +1,50 @@
1
+ 951
2
+ 952
3
+ 953
4
+ 954
5
+ 955
6
+ 956
7
+ 957
8
+ 958
9
+ 959
10
+ 960
11
+ 961
12
+ 962
13
+ 963
14
+ 964
15
+ 965
16
+ 966
17
+ 967
18
+ 968
19
+ 969
20
+ 970
21
+ 971
22
+ 972
23
+ 973
24
+ 974
25
+ 975
26
+ 976
27
+ 977
28
+ 978
29
+ 979
30
+ 980
31
+ 981
32
+ 982
33
+ 983
34
+ 984
35
+ 985
36
+ 986
37
+ 987
38
+ 988
39
+ 989
40
+ 990
41
+ 991
42
+ 992
43
+ 993
44
+ 994
45
+ 995
46
+ 996
47
+ 997
48
+ 998
49
+ 999
50
+ 1000
@@ -0,0 +1,50 @@
1
+ 101
2
+ 102
3
+ 103
4
+ 104
5
+ 105
6
+ 106
7
+ 107
8
+ 108
9
+ 109
10
+ 110
11
+ 111
12
+ 112
13
+ 113
14
+ 114
15
+ 115
16
+ 116
17
+ 117
18
+ 118
19
+ 119
20
+ 120
21
+ 121
22
+ 122
23
+ 123
24
+ 124
25
+ 125
26
+ 126
27
+ 127
28
+ 128
29
+ 129
30
+ 130
31
+ 131
32
+ 132
33
+ 133
34
+ 134
35
+ 135
36
+ 136
37
+ 137
38
+ 138
39
+ 139
40
+ 140
41
+ 141
42
+ 142
43
+ 143
44
+ 144
45
+ 145
46
+ 146
47
+ 147
48
+ 148
49
+ 149
50
+ 150
@@ -0,0 +1,50 @@
1
+ 151
2
+ 152
3
+ 153
4
+ 154
5
+ 155
6
+ 156
7
+ 157
8
+ 158
9
+ 159
10
+ 160
11
+ 161
12
+ 162
13
+ 163
14
+ 164
15
+ 165
16
+ 166
17
+ 167
18
+ 168
19
+ 169
20
+ 170
21
+ 171
22
+ 172
23
+ 173
24
+ 174
25
+ 175
26
+ 176
27
+ 177
28
+ 178
29
+ 179
30
+ 180
31
+ 181
32
+ 182
33
+ 183
34
+ 184
35
+ 185
36
+ 186
37
+ 187
38
+ 188
39
+ 189
40
+ 190
41
+ 191
42
+ 192
43
+ 193
44
+ 194
45
+ 195
46
+ 196
47
+ 197
48
+ 198
49
+ 199
50
+ 200
@@ -0,0 +1,50 @@
1
+ 201
2
+ 202
3
+ 203
4
+ 204
5
+ 205
6
+ 206
7
+ 207
8
+ 208
9
+ 209
10
+ 210
11
+ 211
12
+ 212
13
+ 213
14
+ 214
15
+ 215
16
+ 216
17
+ 217
18
+ 218
19
+ 219
20
+ 220
21
+ 221
22
+ 222
23
+ 223
24
+ 224
25
+ 225
26
+ 226
27
+ 227
28
+ 228
29
+ 229
30
+ 230
31
+ 231
32
+ 232
33
+ 233
34
+ 234
35
+ 235
36
+ 236
37
+ 237
38
+ 238
39
+ 239
40
+ 240
41
+ 241
42
+ 242
43
+ 243
44
+ 244
45
+ 245
46
+ 246
47
+ 247
48
+ 248
49
+ 249
50
+ 250
@@ -0,0 +1,50 @@
1
+ 251
2
+ 252
3
+ 253
4
+ 254
5
+ 255
6
+ 256
7
+ 257
8
+ 258
9
+ 259
10
+ 260
11
+ 261
12
+ 262
13
+ 263
14
+ 264
15
+ 265
16
+ 266
17
+ 267
18
+ 268
19
+ 269
20
+ 270
21
+ 271
22
+ 272
23
+ 273
24
+ 274
25
+ 275
26
+ 276
27
+ 277
28
+ 278
29
+ 279
30
+ 280
31
+ 281
32
+ 282
33
+ 283
34
+ 284
35
+ 285
36
+ 286
37
+ 287
38
+ 288
39
+ 289
40
+ 290
41
+ 291
42
+ 292
43
+ 293
44
+ 294
45
+ 295
46
+ 296
47
+ 297
48
+ 298
49
+ 299
50
+ 300
@@ -0,0 +1,50 @@
1
+ 301
2
+ 302
3
+ 303
4
+ 304
5
+ 305
6
+ 306
7
+ 307
8
+ 308
9
+ 309
10
+ 310
11
+ 311
12
+ 312
13
+ 313
14
+ 314
15
+ 315
16
+ 316
17
+ 317
18
+ 318
19
+ 319
20
+ 320
21
+ 321
22
+ 322
23
+ 323
24
+ 324
25
+ 325
26
+ 326
27
+ 327
28
+ 328
29
+ 329
30
+ 330
31
+ 331
32
+ 332
33
+ 333
34
+ 334
35
+ 335
36
+ 336
37
+ 337
38
+ 338
39
+ 339
40
+ 340
41
+ 341
42
+ 342
43
+ 343
44
+ 344
45
+ 345
46
+ 346
47
+ 347
48
+ 348
49
+ 349
50
+ 350
@@ -0,0 +1,50 @@
1
+ 351
2
+ 352
3
+ 353
4
+ 354
5
+ 355
6
+ 356
7
+ 357
8
+ 358
9
+ 359
10
+ 360
11
+ 361
12
+ 362
13
+ 363
14
+ 364
15
+ 365
16
+ 366
17
+ 367
18
+ 368
19
+ 369
20
+ 370
21
+ 371
22
+ 372
23
+ 373
24
+ 374
25
+ 375
26
+ 376
27
+ 377
28
+ 378
29
+ 379
30
+ 380
31
+ 381
32
+ 382
33
+ 383
34
+ 384
35
+ 385
36
+ 386
37
+ 387
38
+ 388
39
+ 389
40
+ 390
41
+ 391
42
+ 392
43
+ 393
44
+ 394
45
+ 395
46
+ 396
47
+ 397
48
+ 398
49
+ 399
50
+ 400
@@ -0,0 +1,50 @@
1
+ 401
2
+ 402
3
+ 403
4
+ 404
5
+ 405
6
+ 406
7
+ 407
8
+ 408
9
+ 409
10
+ 410
11
+ 411
12
+ 412
13
+ 413
14
+ 414
15
+ 415
16
+ 416
17
+ 417
18
+ 418
19
+ 419
20
+ 420
21
+ 421
22
+ 422
23
+ 423
24
+ 424
25
+ 425
26
+ 426
27
+ 427
28
+ 428
29
+ 429
30
+ 430
31
+ 431
32
+ 432
33
+ 433
34
+ 434
35
+ 435
36
+ 436
37
+ 437
38
+ 438
39
+ 439
40
+ 440
41
+ 441
42
+ 442
43
+ 443
44
+ 444
45
+ 445
46
+ 446
47
+ 447
48
+ 448
49
+ 449
50
+ 450
@@ -0,0 +1,101 @@
1
+ 0
2
+ 1
3
+ 2
4
+ 3
5
+ 4
6
+ 5
7
+ 6
8
+ 7
9
+ 8
10
+ 9
11
+ 10
12
+ 11
13
+ 12
14
+ 13
15
+ 14
16
+ 15
17
+ 16
18
+ 17
19
+ 18
20
+ 19
21
+ 20
22
+ 21
23
+ 22
24
+ 23
25
+ 24
26
+ 25
27
+ 26
28
+ 27
29
+ 28
30
+ 29
31
+ 30
32
+ 31
33
+ 32
34
+ 33
35
+ 34
36
+ 35
37
+ 36
38
+ 37
39
+ 38
40
+ 39
41
+ 40
42
+ 41
43
+ 42
44
+ 43
45
+ 44
46
+ 45
47
+ 46
48
+ 47
49
+ 48
50
+ 49
51
+ 50
52
+ 51
53
+ 52
54
+ 53
55
+ 54
56
+ 55
57
+ 56
58
+ 57
59
+ 58
60
+ 59
61
+ 60
62
+ 61
63
+ 62
64
+ 63
65
+ 64
66
+ 65
67
+ 66
68
+ 67
69
+ 68
70
+ 69
71
+ 70
72
+ 71
73
+ 72
74
+ 73
75
+ 74
76
+ 75
77
+ 76
78
+ 77
79
+ 78
80
+ 79
81
+ 80
82
+ 81
83
+ 82
84
+ 83
85
+ 84
86
+ 85
87
+ 86
88
+ 87
89
+ 88
90
+ 89
91
+ 90
92
+ 91
93
+ 92
94
+ 93
95
+ 94
96
+ 95
97
+ 96
98
+ 97
99
+ 98
100
+ 99
101
+ 100