skynet 0.9.2 → 0.9.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (128) hide show
  1. data/History.txt +49 -0
  2. data/Manifest.txt +84 -6
  3. data/README.txt +75 -64
  4. data/app_generators/skynet_install/skynet_install_generator.rb +14 -8
  5. data/app_generators/skynet_install/templates/migration.rb +1 -24
  6. data/app_generators/skynet_install/templates/skynet_config.rb +50 -0
  7. data/app_generators/skynet_install/templates/skynet_initializer.rb +1 -0
  8. data/app_generators/skynet_install/templates/{skynet_schema.sql → skynet_mysql_schema.sql} +1 -24
  9. data/bin/skynet +37 -10
  10. data/bin/skynet_install +5 -5
  11. data/bin/skynet_tuplespace_server +27 -19
  12. data/examples/dgrep/README +70 -0
  13. data/examples/dgrep/config/skynet_config.rb +26 -0
  14. data/examples/dgrep/data/shakespeare/README +2 -0
  15. data/examples/dgrep/data/shakespeare/poetry/loverscomplaint +381 -0
  16. data/examples/dgrep/data/shakespeare/poetry/rapeoflucrece +2199 -0
  17. data/examples/dgrep/data/shakespeare/poetry/sonnets +2633 -0
  18. data/examples/dgrep/data/shakespeare/poetry/various +640 -0
  19. data/examples/dgrep/data/shakespeare/poetry/venusandadonis +1423 -0
  20. data/examples/dgrep/data/testfile1.txt +1 -0
  21. data/examples/dgrep/data/testfile2.txt +1 -0
  22. data/examples/dgrep/data/testfile3.txt +1 -0
  23. data/examples/dgrep/data/testfile4.txt +1 -0
  24. data/examples/dgrep/lib/dgrep.rb +59 -0
  25. data/examples/dgrep/lib/mapreduce_test.rb +32 -0
  26. data/examples/dgrep/lib/most_common_words.rb +45 -0
  27. data/examples/dgrep/script/dgrep +75 -0
  28. data/examples/rails_mysql_example/README +66 -0
  29. data/examples/rails_mysql_example/Rakefile +10 -0
  30. data/examples/rails_mysql_example/app/controllers/application.rb +10 -0
  31. data/examples/rails_mysql_example/app/helpers/application_helper.rb +3 -0
  32. data/examples/rails_mysql_example/app/models/user.rb +21 -0
  33. data/examples/rails_mysql_example/app/models/user_favorite.rb +5 -0
  34. data/examples/rails_mysql_example/app/models/user_mailer.rb +12 -0
  35. data/examples/rails_mysql_example/app/views/user_mailer/welcome.erb +5 -0
  36. data/examples/rails_mysql_example/config/boot.rb +109 -0
  37. data/examples/rails_mysql_example/config/database.yml +42 -0
  38. data/examples/rails_mysql_example/config/environment.rb +59 -0
  39. data/examples/rails_mysql_example/config/environments/development.rb +18 -0
  40. data/examples/rails_mysql_example/config/environments/production.rb +19 -0
  41. data/examples/rails_mysql_example/config/environments/test.rb +22 -0
  42. data/examples/rails_mysql_example/config/initializers/inflections.rb +10 -0
  43. data/examples/rails_mysql_example/config/initializers/mime_types.rb +5 -0
  44. data/examples/rails_mysql_example/config/initializers/skynet.rb +1 -0
  45. data/examples/rails_mysql_example/config/routes.rb +35 -0
  46. data/examples/rails_mysql_example/config/skynet_config.rb +36 -0
  47. data/examples/rails_mysql_example/db/migrate/001_create_skynet_tables.rb +43 -0
  48. data/examples/rails_mysql_example/db/migrate/002_create_users.rb +16 -0
  49. data/examples/rails_mysql_example/db/migrate/003_create_user_favorites.rb +14 -0
  50. data/examples/rails_mysql_example/db/schema.rb +85 -0
  51. data/examples/rails_mysql_example/db/skynet_mysql_schema.sql +33 -0
  52. data/examples/rails_mysql_example/doc/README_FOR_APP +2 -0
  53. data/examples/rails_mysql_example/lib/tasks/rails_mysql_example.rake +20 -0
  54. data/examples/rails_mysql_example/public/.htaccess +40 -0
  55. data/examples/rails_mysql_example/public/404.html +30 -0
  56. data/examples/rails_mysql_example/public/422.html +30 -0
  57. data/examples/rails_mysql_example/public/500.html +30 -0
  58. data/examples/rails_mysql_example/public/dispatch.cgi +10 -0
  59. data/examples/rails_mysql_example/public/dispatch.fcgi +24 -0
  60. data/examples/rails_mysql_example/public/dispatch.rb +10 -0
  61. data/{log/debug.log → examples/rails_mysql_example/public/favicon.ico} +0 -0
  62. data/examples/rails_mysql_example/public/images/rails.png +0 -0
  63. data/examples/rails_mysql_example/public/index.html +277 -0
  64. data/examples/rails_mysql_example/public/javascripts/application.js +2 -0
  65. data/examples/rails_mysql_example/public/javascripts/controls.js +963 -0
  66. data/examples/rails_mysql_example/public/javascripts/dragdrop.js +972 -0
  67. data/examples/rails_mysql_example/public/javascripts/effects.js +1120 -0
  68. data/examples/rails_mysql_example/public/javascripts/prototype.js +4225 -0
  69. data/examples/rails_mysql_example/public/robots.txt +5 -0
  70. data/examples/rails_mysql_example/script/about +3 -0
  71. data/examples/rails_mysql_example/script/console +3 -0
  72. data/examples/rails_mysql_example/script/destroy +3 -0
  73. data/examples/rails_mysql_example/script/generate +3 -0
  74. data/examples/rails_mysql_example/script/performance/benchmarker +3 -0
  75. data/examples/rails_mysql_example/script/performance/profiler +3 -0
  76. data/examples/rails_mysql_example/script/performance/request +3 -0
  77. data/examples/rails_mysql_example/script/plugin +3 -0
  78. data/examples/rails_mysql_example/script/process/inspector +3 -0
  79. data/examples/rails_mysql_example/script/process/reaper +3 -0
  80. data/examples/rails_mysql_example/script/process/spawner +3 -0
  81. data/examples/rails_mysql_example/script/runner +3 -0
  82. data/examples/rails_mysql_example/script/server +3 -0
  83. data/examples/rails_mysql_example/test/fixtures/user_favorites.yml +9 -0
  84. data/examples/rails_mysql_example/test/fixtures/users.yml +11 -0
  85. data/examples/rails_mysql_example/test/test_helper.rb +38 -0
  86. data/examples/rails_mysql_example/test/unit/user_favorite_test.rb +8 -0
  87. data/examples/rails_mysql_example/test/unit/user_test.rb +8 -0
  88. data/extras/README +7 -0
  89. data/extras/init.d/skynet +87 -0
  90. data/extras/nagios/check_skynet.sh +121 -0
  91. data/extras/rails/controllers/skynet_controller.rb +43 -0
  92. data/extras/rails/views/skynet/index.rhtml +137 -0
  93. data/lib/skynet.rb +59 -1
  94. data/lib/skynet/mapreduce_helper.rb +2 -2
  95. data/lib/skynet/mapreduce_test.rb +32 -1
  96. data/lib/skynet/message_queue_adapters/mysql.rb +422 -539
  97. data/lib/skynet/message_queue_adapters/tuple_space.rb +45 -71
  98. data/lib/skynet/skynet_active_record_extensions.rb +22 -11
  99. data/lib/skynet/skynet_config.rb +54 -20
  100. data/lib/skynet/skynet_console.rb +4 -1
  101. data/lib/skynet/skynet_console_helper.rb +5 -1
  102. data/lib/skynet/skynet_debugger.rb +58 -4
  103. data/lib/skynet/skynet_job.rb +61 -24
  104. data/lib/skynet/skynet_launcher.rb +29 -3
  105. data/lib/skynet/skynet_logger.rb +11 -1
  106. data/lib/skynet/skynet_manager.rb +403 -240
  107. data/lib/skynet/skynet_message.rb +1 -3
  108. data/lib/skynet/skynet_message_queue.rb +42 -19
  109. data/lib/skynet/skynet_partitioners.rb +19 -15
  110. data/lib/skynet/skynet_ruby_extensions.rb +18 -0
  111. data/lib/skynet/skynet_tuplespace_server.rb +17 -14
  112. data/lib/skynet/skynet_worker.rb +132 -98
  113. data/lib/skynet/version.rb +1 -1
  114. data/script/destroy +0 -0
  115. data/script/generate +0 -0
  116. data/script/txt2html +0 -0
  117. data/test/test_helper.rb +2 -0
  118. data/test/test_skynet.rb +13 -5
  119. data/test/test_skynet_manager.rb +24 -9
  120. data/test/test_skynet_task.rb +1 -1
  121. data/website/index.html +77 -29
  122. data/website/index.txt +53 -24
  123. data/website/stylesheets/screen.css +12 -12
  124. metadata +156 -66
  125. data/app_generators/skynet_install/templates/skynet +0 -46
  126. data/log/skynet.log +0 -29
  127. data/log/skynet_tuplespace_server.log +0 -7
  128. data/log/skynet_worker.pid +0 -1
@@ -1,3 +1,52 @@
1
+ == 0.9.3 2008-05-22
2
+ Skynet::Manager and Skynet Script Runner
3
+ - Rewrote how Skynet workers and skynet manager talks on each machine. See below for more info
4
+
5
+ - Added an examples/ directory with sample skynet apps.
6
+
7
+ - Support starting Skynet with 'skynet start/stop' to daemonize
8
+ - skynet_install now only installs a config/skynet_config.rb in your applications directory. You are no longer supposed
9
+ to have a script/skynet to start skynet. Instead, as long as you have a config/skynet_config.rb you can just run
10
+ 'skynet start/stop' from within your application_directory/
11
+ If you're installing in a rails application, it will install a default config/initializers/skynet.rb which merely requires config/skynet_config.rb
12
+ - The config file can also be specified with --config= skynet start
13
+
14
+ - Gracefully handles trying to start skynet more than once
15
+ - Close file handles on exec.
16
+ Skynet::Worker and Skynet::Manager now call Skynet.fork_and_exec instead of their own versions.
17
+ Skynet.fork_and_exec prevents file descriptor exhaustion by calling Skynet.close_file_handles.
18
+ Skynet::Manager detatches from console by calling Skynet.close_console
19
+
20
+ - Dramatically improved Skynet shutdown time
21
+ - Huge performance improvements in taking/putting tasks and results. Much lower resource utilization, especially on mysql.
22
+ - You now have to specify your PIDFILE and LOGFILE dirs and files differently. This will break all old skynet runners. Sorry.
23
+ - Skynet runner handles default config variables better
24
+ - Fix serious bug in Skynet::Partitioners::RecombineAndSplit where it wouldn't handle empty results well.
25
+ - Fixed Skynet::Partitioners::ArrayDataSplitByFirstEntry to handle strings as keys better
26
+ - Added Skynet::Job.results_by_job_id to retrieve results from asyncronous jobs
27
+ - Added printlog logging method which always prints to the log as [LOG]
28
+ - Deprecated Skynet.new to Skynet.start
29
+ - Mysql Message Queue Adapter - Make delete_expired_messages much safer.
30
+ - rename ActiveRecord::Base.distributed_find.each to ActiveRecord::Base.distributed_find.map
31
+ - ActiveRecord::Base.distributed_find - Patch submitted by Lourens Naude (lourens@methodmissing.com) which checks the model for the primary_key name as opposed to assuming it is
32
+ 'id'
33
+ - We don't want to use rails constantize so I've temporarily borrowed the method from ActiveSupport inflector and added it to skynet_ruby_extensions.
34
+ - Fix bug in Job comment where it referenced MapreduceTest instead of Skynet::MapreduceTest
35
+ - Fix tests. For some reason you still can't run ALL the test at once with rake test, but if the files are run individually they all pass.
36
+ - Change mysql text fields to longtext in migration and schema files
37
+ - Include some extras including our init.d script, our nagios monitoring script, rails controller and view for monitoring
38
+ - Created a new skynet rails initializer which gets installed from skynet_install --rails
39
+ - Modified the skynet_install skynet runner to take skynet initializer into account
40
+ - Skynet::ActiveRecordExtensions Fixed a bug where it would fail if your table had fewer than 1000 rows. It performs a count
41
+ first now to make sure there are enough rows.
42
+ - Introduced some new Skynet::Config methods for getting logfile and pidfile locations
43
+
44
+ Skynet Manager/Worker Refactor
45
+ The workers used to publish their worker statuses to the skynet_worker_queue which lived in the same Q space as the skynet_message_queue. skynet managers would then query that queue for their workers' statuses. This was a very inefficient use of central resources. NOW, workers communicate with their manager vir DRb, calling manager.worker_notify(status). This adds the worker status hash onto a local Queue object stored in the manager. A separate thread watches that queue and updates the internal manager information about it's workers. This was in place of polling a queue every N seconds for new worker records.
46
+ Managers now save their worker information to a file periodically so they can be reloaded on a restart. This means managers can keep track of how many tasks all of their workers have done even after restarts.
47
+ As a consequence of decentralizing worker stats, you now have to ask all your managers for their individual stats along with the main message queue stats. Added a stats_for_hosts method to Skynet::Manager which aggregates stats accross many managers.
48
+
49
+
1
50
  == 0.9.2 2008-01-22
2
51
  Highlights:
3
52
  - Multiple Message Queues
@@ -6,13 +6,95 @@ Rakefile
6
6
  app_generators/skynet_install/USAGE
7
7
  app_generators/skynet_install/skynet_install_generator.rb
8
8
  app_generators/skynet_install/templates/migration.rb
9
- app_generators/skynet_install/templates/skynet
10
- app_generators/skynet_install/templates/skynet_schema.sql
9
+ app_generators/skynet_install/templates/skynet_config.rb
10
+ app_generators/skynet_install/templates/skynet_initializer.rb
11
+ app_generators/skynet_install/templates/skynet_mysql_schema.sql
11
12
  bin/skynet
12
13
  bin/skynet_install
13
14
  bin/skynet_tuplespace_server
14
15
  config/hoe.rb
15
16
  config/requirements.rb
17
+ examples/dgrep/README
18
+ examples/dgrep/config/skynet_config.rb
19
+ examples/dgrep/data/shakespeare/README
20
+ examples/dgrep/data/shakespeare/poetry/loverscomplaint
21
+ examples/dgrep/data/shakespeare/poetry/rapeoflucrece
22
+ examples/dgrep/data/shakespeare/poetry/sonnets
23
+ examples/dgrep/data/shakespeare/poetry/various
24
+ examples/dgrep/data/shakespeare/poetry/venusandadonis
25
+ examples/dgrep/data/testfile1.txt
26
+ examples/dgrep/data/testfile2.txt
27
+ examples/dgrep/data/testfile3.txt
28
+ examples/dgrep/data/testfile4.txt
29
+ examples/dgrep/lib/dgrep.rb
30
+ examples/dgrep/lib/mapreduce_test.rb
31
+ examples/dgrep/lib/most_common_words.rb
32
+ examples/dgrep/script/dgrep
33
+ examples/rails_mysql_example/README
34
+ examples/rails_mysql_example/Rakefile
35
+ examples/rails_mysql_example/app/controllers/application.rb
36
+ examples/rails_mysql_example/app/helpers/application_helper.rb
37
+ examples/rails_mysql_example/app/models/user.rb
38
+ examples/rails_mysql_example/app/models/user_favorite.rb
39
+ examples/rails_mysql_example/app/models/user_mailer.rb
40
+ examples/rails_mysql_example/app/views/user_mailer/welcome.erb
41
+ examples/rails_mysql_example/config/boot.rb
42
+ examples/rails_mysql_example/config/database.yml
43
+ examples/rails_mysql_example/config/environment.rb
44
+ examples/rails_mysql_example/config/environments/development.rb
45
+ examples/rails_mysql_example/config/environments/production.rb
46
+ examples/rails_mysql_example/config/environments/test.rb
47
+ examples/rails_mysql_example/config/initializers/inflections.rb
48
+ examples/rails_mysql_example/config/initializers/mime_types.rb
49
+ examples/rails_mysql_example/config/initializers/skynet.rb
50
+ examples/rails_mysql_example/config/routes.rb
51
+ examples/rails_mysql_example/config/skynet_config.rb
52
+ examples/rails_mysql_example/db/migrate/001_create_skynet_tables.rb
53
+ examples/rails_mysql_example/db/migrate/002_create_users.rb
54
+ examples/rails_mysql_example/db/migrate/003_create_user_favorites.rb
55
+ examples/rails_mysql_example/db/schema.rb
56
+ examples/rails_mysql_example/db/skynet_mysql_schema.sql
57
+ examples/rails_mysql_example/doc/README_FOR_APP
58
+ examples/rails_mysql_example/lib/tasks/rails_mysql_example.rake
59
+ examples/rails_mysql_example/public/.htaccess
60
+ examples/rails_mysql_example/public/404.html
61
+ examples/rails_mysql_example/public/422.html
62
+ examples/rails_mysql_example/public/500.html
63
+ examples/rails_mysql_example/public/dispatch.cgi
64
+ examples/rails_mysql_example/public/dispatch.fcgi
65
+ examples/rails_mysql_example/public/dispatch.rb
66
+ examples/rails_mysql_example/public/favicon.ico
67
+ examples/rails_mysql_example/public/images/rails.png
68
+ examples/rails_mysql_example/public/index.html
69
+ examples/rails_mysql_example/public/javascripts/application.js
70
+ examples/rails_mysql_example/public/javascripts/controls.js
71
+ examples/rails_mysql_example/public/javascripts/dragdrop.js
72
+ examples/rails_mysql_example/public/javascripts/effects.js
73
+ examples/rails_mysql_example/public/javascripts/prototype.js
74
+ examples/rails_mysql_example/public/robots.txt
75
+ examples/rails_mysql_example/script/about
76
+ examples/rails_mysql_example/script/console
77
+ examples/rails_mysql_example/script/destroy
78
+ examples/rails_mysql_example/script/generate
79
+ examples/rails_mysql_example/script/performance/benchmarker
80
+ examples/rails_mysql_example/script/performance/profiler
81
+ examples/rails_mysql_example/script/performance/request
82
+ examples/rails_mysql_example/script/plugin
83
+ examples/rails_mysql_example/script/process/inspector
84
+ examples/rails_mysql_example/script/process/reaper
85
+ examples/rails_mysql_example/script/process/spawner
86
+ examples/rails_mysql_example/script/runner
87
+ examples/rails_mysql_example/script/server
88
+ examples/rails_mysql_example/test/fixtures/user_favorites.yml
89
+ examples/rails_mysql_example/test/fixtures/users.yml
90
+ examples/rails_mysql_example/test/test_helper.rb
91
+ examples/rails_mysql_example/test/unit/user_favorite_test.rb
92
+ examples/rails_mysql_example/test/unit/user_test.rb
93
+ extras/README
94
+ extras/init.d/skynet
95
+ extras/nagios/check_skynet.sh
96
+ extras/rails/controllers/skynet_controller.rb
97
+ extras/rails/views/skynet/index.rhtml
16
98
  lib/skynet.rb
17
99
  lib/skynet/mapreduce_helper.rb
18
100
  lib/skynet/mapreduce_test.rb
@@ -37,10 +119,6 @@ lib/skynet/skynet_task.rb
37
119
  lib/skynet/skynet_tuplespace_server.rb
38
120
  lib/skynet/skynet_worker.rb
39
121
  lib/skynet/version.rb
40
- log/debug.log
41
- log/skynet.log
42
- log/skynet_tuplespace_server.log
43
- log/skynet_worker.pid
44
122
  script/destroy
45
123
  script/generate
46
124
  script/txt2html
data/README.txt CHANGED
@@ -2,63 +2,61 @@ Skynet
2
2
  http://skynet.rubyforge.org/
3
3
  by Adam Pisoni and Geni.com
4
4
 
5
- == DESCRIPTION:
5
+ == DESCRIPTION
6
6
 
7
- Skynet is an open source Ruby implementation of Google's Map/Reduce framework, created at Geni. With Skynet, one can easily convert a time-consuming serial task, such as a computationally expensive Rails migration, into a distributed program running on many computers.
7
+ Skynet is an open source Ruby implementation of Google's MapReduce framework, created at Geni. With Skynet, one can easily convert a time-consuming serial task, such as a computationally expensive Rails migration, into a distributed program running on many computers. If you'd like to learn more about MapReduce, see my intro at the bottom of this document.
8
8
 
9
9
  Skynet is an adaptive, self-upgrading, fault-tolerant, and fully distributed system with no single point of failure. It uses a "peer recovery" system where workers watch out for each other. If a worker dies or fails for any reason, another worker will notice and pick up that task. Skynet also has no special 'master' servers, only workers which can act as a master for any task at any time. Even these master tasks can fail and will be picked up by other workers.
10
10
 
11
- == DOCUMENTATION
11
+ For more detailed documentation see the following:
12
12
 
13
- Feel free to read on if you want more of an overview of Skynet with some specific examples. More specific documentation can be found here:
13
+ Skynet::Job - The main interface to Skynet; includes an example of how to use Skynet
14
14
 
15
- Skynet::Job - The main interface to Skynet
16
-
17
- Skynet::Config - Configuration Options
15
+ Skynet::Config - Configuration options
18
16
 
19
17
  bin/skynet[link:files/bin/skynet.html] - Starting Skynet
20
18
 
21
19
  bin/skynet_install[link:files/bin/skynet_install.html] - Installing Skynet into a local project
22
20
 
23
- == Map/Reduce
21
+ There are also some examples in the examples/ directory included with Skynet.
24
22
 
25
- First of all, Skynet is merely a distributed computing system that allows you to follow the map/reduce steps. You don't have to use it as a map/reduce framework. You can use it as a simple distributed system, or even a simple asynchronous processing system.
23
+ == INSTALLATION
26
24
 
27
- If you already know what Map/Reduce is, skip this section.
25
+ Skynet can be installed via RubyGems:
28
26
 
29
- If you want to know where all this Map/Reduce hype started, you should read Google's paper on it. http://labs.google.com/papers/mapreduce.html
27
+ $ sudo gem install skynet
30
28
 
31
- When I first read that Google paper some years ago, I was a little confused about what all the hypes was. At the most basic level, it seemed too simple to be revolutionary. So you've got a job with 3 steps, you put some data in, it gets split out to a map step run no many machines, the returned data gets reshuffled and parceled out to a reduce step run on many machines. All the results are then put together again. You can see it as 5 steps actually. Data -> Partition -> Map -> Partition -> Reduce. Simple enough. Almost too simple. It was only years later when I began working on Skynet that I realize what the revolutionary part of Google's framwork was. It made distributed computing accessible. Any engineer could write a complex distributed system without needing to know about the complexities of such systems. Also, since the distributed system was generalized, you would only need one class of machines to run ALL of your distributed processing, instead of specialized machines for specialized functions. THAT was revolutionary.
29
+ or grab the bleeding edge skynet in svn at
30
+ $ svn checkout svn+ssh://developername@rubyforge.org/var/svn/skynet
31
+ $ cd skynet; rake install_gem
32
32
 
33
- There are a number of key differences between Google's MR system and skynet. Firstly, currently you can not actually send raw code to the workers. You are really only telling it where the code is. At first this bothered me a lot. Then I realized that in most OO systems, the amount of code you'd need duplicate and to send over the wire to every worker could be ridiculous. For example, if you want to distribute a task you need to run in Rails, you'd have to send almost all of your app and rails to every worker with every chunk of data. So, even if you COULD send code, you'd probably only be sending code that just called some other code in your system. If you can't send ALL the code it needs, then you might as well just tell it where the code is.
33
+ == INITIAL SETUP
34
34
 
35
- The second big difference is that Google's MR framework uses Master federater processes to dole out tasks, recombine them, and generally watch the system. Skynet has not such masters. Instead Skynet uses a standard message queue for all communication. That same message queue allows workers to watch each other in the same way a master would, but without the single point of failure (except the queue itself).
35
+ Skynet works by putting "tasks" on a message queue which are picked up by skynet workers. The workers execute tasks and put their results back on the message queue. Skynet workers need to load your code at startup in order to be able to execute your tasks. This loading is handled by installing a skynet config file into your app running skynet_install[link:files/bin/skynet_install.html].
36
36
 
37
- At its simplest level, a single map reduce job defines a data set, a map method and a reduce method. It may also define a partition method. The map/reduce server evenly splits up (partitions) the data given to it and sends those chunks of data, along with a copy of the code in the map method, to workers that execute the map method against the data it was given. The output from each worker is sent back to the map/reduce server. At this point the Mapreduce server evenly partitions the RESULT data returned from the workers and sends those chunks of data along with the reduce code to the workers to be executed. The reducers return the final result which is returned to whomever requested the job be done in the first place. Not all job need a reduce step, some may just have a map step.
37
+ $ skynet_install [--rails] [--mysql] APP_ROOT_DIR
38
38
 
39
- The most common example of a mapreduce job is a distributed word counter. Say you wanted to determine how many times a single word appears in a 1GB text file. The map/reduce server would break up the 1GB file into reasonable chunks, say 100 lines per chunk (or partition) and then send each 100 line partition along with the code that looks for that word, to workers. Each worker would grab its partition of the data, count how many times the word appears in the data and return that number. It might take dozens of workers to complete the task. When the map step is done, you are left with a huge list of counts returned by the workers. In this example, the reduce step would involve sending that list of counts to another worker, with the code required to sum those counts and finally return the total. In this way a task that used to be done in a linear fashion can be parallelized easily.
39
+ This creates a file called skynet_config.rb in APP_ROOT_DIR/config to which you can add the relevant requires. For example, you might have a rails app and want some of that code to run asynchronously or in a distributed way. Just run 'skynet_install --rails' in your rails root, and it will automatically create config/skynet_config.rb and require environment.rb.
40
40
 
41
- == INSTALLATION:
41
+ Skynet currently supports 2 message queue systems, TupleSpace and Mysql. By default, the TupleSpace queue is used as it is the easiest to set up, though it is less powerful and less scaleable for large installations. If you pass --mysql to skynet_install, it will assume you are using the mysql as your message queue.
42
42
 
43
- Skynet can be installed via RubyGems:
43
+ == STARTING SKYNET
44
44
 
45
- $ sudo gem install skynet
45
+ Once it is installed in your application, you can run skynet from your applications root directory with:
46
46
 
47
- == GETTING STARTED
47
+ $ skynet start [--workers=N]
48
48
 
49
- Skynet works by putting "tasks" on a message queue which are picked up by skynet workers, who execute the tasks, then put their results back on the message queue. Skynet works best when it runs with your code. For example, you might have a rails app and want some code you've already written to run asynchronously or in a distributed way. Skynet can run within your code by installing a skynet launcher into your app. Running this skynet launcher within your app guarantees all skynet workers will have access to your code. This will be covered later.
49
+ This starts a skynet tuple space message queue and 4 workers. You can control how many workers to start per machine
50
+ by passing --workers=N.
50
51
 
51
- Skynet currently supports 2 message queue systems, TupleSpace and Mysql. By default, the TupleSpace queue is used as it is the easiest to set up, though it is less powerful and less scaleable for large installations.
52
+ == SKYNET CONSOLE
52
53
 
53
- == RUNING SKYNET FOR THE FIRST TIME
54
- Since Skynet is a distributed system, it requires you have a skynet message queue as well as any number of skynet workers running. To start a skynet message queue and a small number of workers:
54
+ You can now run the skynet console to play with skynet a little. See Skynet::ConsoleHelper for commands.
55
55
 
56
- $ skynet
56
+ $ skynet console
57
57
 
58
- This starts a skynet tuple space message queue and 4 workers. You can now run the skynet console to play with skynet a little. See Skynet::ConsoleHelper for commands.
58
+ Remember, when you change your code, you must stop/start skynet.
59
59
 
60
- $ skynet console
61
-
62
60
  For help try:
63
61
  $ skynet --help
64
62
  or
@@ -69,67 +67,80 @@ Here are some commands you can run in the skynet console.
69
67
  > manager.worker_pids
70
68
  > [1,2,3,1,1,4].mapreduce(Skynet::MapreduceTest)
71
69
 
72
- That last command actually took whatever array you gave it and counted the number of times each element appeared in the array. It's not a very useful task, but it shows how easy it is to use.
73
-
74
- == RUNING SKYNET IN YOUR APPLICATION
75
-
76
- To be really useful, you'll want to run skynet in your own application. To do that run:
70
+ That last command actually took whatever array you gave it and counted the number of times each element appeared in the array. It's not a very useful task, but it shows how easy Skynet is to use.
77
71
 
78
- $ skynet_install [--rails] YOUR_APP_DIRECTORY
72
+ To see what Skynet is doing, you may want to tail the skynet logs being written to your log directory.
79
73
 
80
- If you pass --rails it will assume it is installing in a rails app. Once it is installed in your application, you can run skynet with
74
+ For more information on creating your own Skynet jobs read the Skynet::Job documentation.
81
75
 
82
- $ ./script/skynet
83
- $ ./script/skynet console
76
+ == USAGE
84
77
 
85
- == USAGE:
86
-
87
- Skynet was designed to make doing easy things easy and hard things possible. The easiest way to use skynet is to create a new class with a self.map class method. You can optionally include self.reduce, self.reduce_partitioner, self.map_partitioner as well. Each of those methods should expect a single array (regardless of what data you pass). Then, simple create an array and call mapreduce on it passing your class name. Skynet will figure out which methods your class supports and use them accordingly.
78
+ Skynet was designed to make doing easy things easy and hard things possible. The easiest way to use skynet is to create a new class with a self.map class method. You can optionally include self.reduce and self.reduce_partitioner as well. Each of those methods should expect a single array (regardless of what data you pass). Then, simply create an array and call mapreduce on it passing your class name. Skynet will figure out which methods your class supports and use them accordingly.
88
79
 
89
80
  == USING SKYNET IN RAILS
90
81
 
91
- Skynet includes an addition to ActiveRecord that is very powerful.
82
+ Skynet includes an extension to ActiveRecord that is very powerful.
92
83
 
93
84
  === distributed_find
94
- $ YourModel.distributed_find(:all).each(YourClass)
95
- or
85
+
96
86
  $ YourModel.distributed_find(:all).each(:somemethod)
97
87
 
98
- In the first example, a find is 'virtually' run with your model class, and the results are distributed to the skynet workers. If you've implemented a self.map method in YourClass, the retrieved objects will be passed (as arrays) on all the workers.
99
-
100
- In the second example, once the objects of YourModel are distributed, each worker merely calls :somemethod against each object.
88
+ A find is 'virtually' run with your model class, and the results are distributed to the skynet workers. Each worker then calls :somemethod against each object.
101
89
 
102
90
  === send_later
103
91
 
104
- $ model_object.send_later(:method,options,:save)
92
+ $ model_object.send_later(:method, options, :save)
105
93
 
106
- Sometimes you have a method you want to call on a model asynchronously. Using :send_later you can call a method, pass it options, and decide whether you want Skynet to save that model or not once its done calling your method.
94
+ Sometimes you have a method you want to call on a model asynchronously. Using :send_later you can call a method, pass it options, and decide whether you want Skynet to save that model or not once its done calling your method.
107
95
 
108
96
  == Creating Skynet Jobs
109
97
 
110
- The main interface to Skynet is through Skynet::AsyncJob or Skynet::Job
98
+ The main interface to Skynet is through Skynet::Job
111
99
 
112
- job = Skynet::AsyncJob.new(options)
113
- job.run_master
100
+ job = Skynet::Job.new(options)
101
+ job.run
114
102
 
115
- There are many options you can pass or change once you have a job object. See Skynet::Job for more info.
103
+ There are many options you can pass or change once you have a job object. See Skynet::Job for more info.
116
104
 
117
- Most of the time, you will only need to pass a map_reduce_class and map_data. All other options just give you finer grain control. map_data must be an array. The map_reduce_class must AT LEAST implement a self.map class method. It may also implement self.reduce, self.reduce_partitioner, and self.map_partitioner. Skynet will assume it can use all of those methods in the map_reduce_class you pass.
105
+ Most of the time, you will only need to pass a :map_reduce_class and :map_data. All other options just give you finer grained control. The :map_data must be an array, and the :map_reduce_class must implement at least a self.map class method. It may optionally implement self.reduce and self.reduce_partitioner. Your map and reduce class methods should ALWAYS assume they are being passed an array. Your map method must always return an array as well.
106
+
107
+ == Skynet Logging
108
+
109
+ You might be interested in seeing what skynet is doing. There are two Skynet::Config options which control logging: Skynet::CONFIG[:SKYNET_LOG_LEVEL] and Skynet::CONFIG[:SKYNET_LOG_FILE]. Skynet::CONFIG[:SKYNET_LOG_LEVEL] is set to Logger::ERROR by default. Other possibilities are Logger::DEBUG, Logger::INFO, Logger::WARN, Logger::ERROR, and Logger::FATAL. You might try Logger::INFO to see more of what's going on. To use the Skynet::Logger inside your own classes simply add:
110
+
111
+ include SkynetDebugger
112
+
113
+ For more information see SkynetDebugger[link:files/lib/skynet/skynet_debugger_rb.html].
114
+
115
+ == A Note on MapReduce
116
+
117
+ At its simplest level, a MapReduce job defines a data set, a map method and a reduce method. It may also define a partition method. The MapReduce server evenly splits up (partitions) the data given to it and sends those chunks of data, along with a copy of the code in the map method, to workers that execute the map method against the data it was given. The output from each worker is sent back to the MapReduce server. At this point the MapReduce server evenly partitions the RESULT data returned from the workers and sends those chunks of data along with the reduce code to the workers to be executed. The reducers return the final result which is returned to the process that requested the job be done in the first place. Not all jobs need a reduce step, some may just have a map step.
118
118
 
119
- Your map and reduce class methods should ALWAYS assume they are being passed an array. Your map method must always return an array as well.
119
+ The most common example of a MapReduce job is a distributed word counter. Say you wanted to determine how many times a single word appears in a 1GB text file. The MapReduce server would break up the 1GB file into reasonable chunks, say 100 lines per chunk (or partition) and then send each 100 line partition along with the code that looks for that word, to workers. Each worker would grab its partition of the data, count how many times the word appears in the data and return that number. It might take dozens of workers to complete the task. When the map step is done, you are left with a huge list of counts returned by the workers. In this example, the reduce step would consist of sending that list of counts to another worker, with the code required to sum those counts and finally return the total. In this way a task that used to be done in a linear fashion can be parallelized easily.
120
+
121
+ If you want more details on MapReduce, read Google's paper on it. http://labs.google.com/papers/mapreduce.html
122
+
123
+ When I first read that Google paper some years ago, I was a little confused about what was so unique about it. At the most basic level, it seemed too simple to be revolutionary. So you've got a job with two steps: map and reduce. You put some data in, it gets split out to a map step run on many machines. the returned data gets reshuffled and parceled out to a reduce step run on many machines. All the results are then put together again. You can see it as five steps actually. Data -> Partition -> Map -> Partition -> Reduce. Simple enough. Almost too simple. It was only years later when I began working on Skynet that I realized what the revolutionary part of Google's framework was. It made distributed computing accessible. Any engineer could write a complex distributed system without needing to know about the complexities of such systems. They just write a map function and a reduce function. Also, since the distributed system was generalized, you would only need one class of machines to run ALL of your distributed processing, instead of specialized machines for specialized jobs. That WAS revolutionary.
124
+
125
+ Skynet is merely a distributed computing system that allows you to break your problem into map and reduce steps. You don't have to use it as a MapReduce framework though. You can use it as a simple distributed system, or even a simple asynchronous processing system.
126
+
127
+ There are a number of key differences between Google's MapReduce system and skynet. First, currently you can not actually send raw code to the workers. You are really only telling it where the code is. At first this bothered me a lot. Then I realized that in most Object Oriented systems, the amount of code you'd need to duplicate and send over the wire to every worker could be ridiculous. For example, if you want to distribute a task you need to run in Rails, you'd have to send almost all of your app and rails code to every worker with every chunk of data. So, even if you COULD send code, that code would probably eventually jut call some other code in your system. If you can't send ALL the code needed for a task, then you might as well just tell Skynet where all the needed code is.
128
+
129
+ The second big difference is that Google's MapReduce framework uses master federator processes to dole out tasks, recombine them, and generally watch the system. Skynet has no such masters. Instead Skynet uses a standard message queue for all communication. That message queue allows workers to watch each other in the same way a master would, but without the single point of failure (except for the queue itself).
120
130
 
121
131
  == CREDITS
132
+
122
133
  There are a number of people who either directly or indirectly worked on Skynet.
123
- John Beppu (wrote the original worker/manager code)
124
- Justin Balthrop
125
- Zack Parker
126
- Amos Elliston
127
- Zack Hobson
128
- Alan Braverman
129
- Mike Stangel
130
- Scott Steadman
131
- Andrew Arrow
132
- Jason Rojas
134
+ * Justin Balthrop
135
+ * Zack Parker
136
+ * Amos Elliston
137
+ * Zack Hobson
138
+ * Alan Braverman
139
+ * Mike Stangel
140
+ * Scott Steadman
141
+ * Andrew Arrow
142
+ * John Beppu (wrote the original worker/manager code)
143
+ * Jason Rojas
133
144
 
134
145
  Skynet was inspired by and heavily influenced by Josh Carter and this blog post.
135
146
  http://multipart-mixed.com/software/simple_mapreduce_in_ruby.html
@@ -20,15 +20,19 @@ class SkynetInstallGenerator < RubiGen::Base
20
20
 
21
21
  def manifest
22
22
  record do |m|
23
+
23
24
  # Ensure appropriate folder(s) exists
24
- m.directory 'script'
25
25
  BASEDIRS.each { |path| m.directory path }
26
26
 
27
27
  # Create stubs
28
- m.template "skynet", "script/skynet", :collision => :ask, :chmod => 0775, :shebang => options[:shebang]
28
+ m.template "skynet_config.rb", "config/skynet_config.rb", :collision => :ask, :chmod => 0655
29
+ if @in_rails
30
+ m.directory 'config/initializers'
31
+ m.template "skynet_initializer.rb", "config/initializers/skynet.rb", :collision => :ask, :chmod => 0655
32
+ end
29
33
  if @mysql
30
- m.template "skynet_schema.sql", "db/skynet_schema.sql", :collision => :ask, :chmod => 0655
31
34
  m.directory 'db/migrate'
35
+ m.template "skynet_mysql_schema.sql", "db/skynet_mysql_schema.sql", :collision => :ask, :chmod => 0655
32
36
  m.migration_template "migration.rb", "db/migrate",
33
37
  :collision => :ask,
34
38
  :assigns => {
@@ -45,7 +49,7 @@ Creates a ...
45
49
 
46
50
  USAGE: #{spec.name} [--rails] [--mysql] directory (can be '.' for current)"
47
51
  Installs:
48
- ./script/skynet
52
+ ./config/skynet_config.rb
49
53
  EOS
50
54
  end
51
55
 
@@ -58,13 +62,16 @@ EOS
58
62
  opts.on("--mysql",
59
63
  "Include mysql migration if you want to use mysql as your message queue.
60
64
  Installs:
61
- ./db/skynet_schema.sql
65
+ ./db/skynet_mysql_schema.sql
62
66
  ./db/migrate/db/migrate/###_create_skynet_tables.rb
63
67
  ") do |mysql|
64
68
  options[:mysql] = true if mysql
65
69
  end
66
70
  opts.on("-r", "--rails",
67
- "Install into rails app",
71
+ "Install into rails app.
72
+ Installs:
73
+ ./config/initializers/skynet.rb
74
+ (If using rails 1, make sure to add require 'skynet' to your environment.rb)",
68
75
  "Default: false") do |rails|
69
76
  options[:rails] = true if rails
70
77
  end
@@ -81,8 +88,7 @@ EOS
81
88
  # Installation skeleton. Intermediate directories are automatically
82
89
  # created so don't sweat their absence here.
83
90
  BASEDIRS = %w(
84
- db
91
+ config
85
92
  log
86
- script
87
93
  )
88
94
  end
@@ -1,25 +1,5 @@
1
1
  class <%= migration_name %> < ActiveRecord::Migration
2
2
  def self.up
3
- create_table :skynet_worker_queues do |t|
4
- t.column :id, "bigint unsigned primary key"
5
- t.column :queue_id, :integer, :default => 0
6
- t.column :created_on, :timestamp
7
- t.column :updated_on, :timestamp
8
- t.column :tasktype, :string
9
- t.column :tasksubtype, :string
10
- t.column :worker_id, 'bigint unsigned'
11
- t.column :hostname, :string
12
- t.column :process_id, :integer
13
- t.column :job_id, 'bigint unsigned'
14
- t.column :task_id, 'bigint unsigned'
15
- t.column :iteration, :integer
16
- t.column :name, :string
17
- t.column :map_or_reduce, :string
18
- t.column :started_at, "decimal(16,4)"
19
- t.column :version, :integer
20
- t.column :processed, :integer
21
- t.column :timeout, "decimal(16,4)"
22
- end
23
3
  create_table :skynet_message_queues do |t|
24
4
  t.column :id, "bigint unsigned primary key"
25
5
  t.column :queue_id, :integer, :default => 0
@@ -29,7 +9,7 @@ class <%= migration_name %> < ActiveRecord::Migration
29
9
  t.column :tasktype, :string
30
10
  t.column :task_id, 'bigint unsigned'
31
11
  t.column :job_id, 'bigint unsigned'
32
- t.column :raw_payload, :text
12
+ t.column :raw_payload, "longtext"
33
13
  t.column :payload_type, :string
34
14
  t.column :name, :string
35
15
  t.column :expiry, :integer
@@ -51,15 +31,12 @@ class <%= migration_name %> < ActiveRecord::Migration
51
31
  add_index :skynet_message_queues, :task_id
52
32
  add_index :skynet_message_queues, :tran_id, :unique => true
53
33
  add_index :skynet_message_queues, [:queue_id,:tasktype,:payload_type,:expire_time], :name => "index_skynet_mqueue_for_take"
54
- add_index :skynet_worker_queues, [:hostname, :process_id]
55
- add_index :skynet_worker_queues, :worker_id, :unique=> true
56
34
  execute "insert into skynet_queue_temperature (queue_id,type) values (0,'master')"
57
35
  execute "insert into skynet_queue_temperature (queue_id,type) values (0,'any')"
58
36
  execute "insert into skynet_queue_temperature (queue_id,type) values (0,'task')"
59
37
  end
60
38
 
61
39
  def self.down
62
- drop_table :skynet_worker_queues
63
40
  drop_table :skynet_queue_temperature
64
41
  drop_table :skynet_message_queues
65
42
  end