wukong 3.0.0.pre → 3.0.0.pre2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +46 -33
- data/.gitmodules +3 -0
- data/.rspec +1 -1
- data/.travis.yml +8 -1
- data/.yardopts +0 -13
- data/Guardfile +4 -6
- data/{LICENSE.textile → LICENSE.md} +43 -55
- data/README-old.md +422 -0
- data/README.md +279 -418
- data/Rakefile +21 -5
- data/TODO.md +6 -6
- data/bin/wu-clean-encoding +31 -0
- data/bin/wu-lign +2 -2
- data/bin/wu-local +69 -0
- data/bin/wu-server +70 -0
- data/examples/Gemfile +38 -0
- data/examples/README.md +9 -0
- data/examples/dataflow/apache_log_line.rb +64 -25
- data/examples/dataflow/fibonacci_series.rb +101 -0
- data/examples/dataflow/parse_apache_logs.rb +37 -7
- data/examples/{dataflow.rb → dataflow/scraper_macro_flow.rb} +0 -0
- data/examples/dataflow/simple.rb +4 -4
- data/examples/geo.rb +4 -0
- data/examples/geo/geo_grids.numbers +0 -0
- data/examples/geo/geolocated.rb +331 -0
- data/examples/geo/quadtile.rb +69 -0
- data/examples/geo/spec/geolocated_spec.rb +247 -0
- data/examples/geo/tile_fetcher.rb +77 -0
- data/examples/graph/minimum_spanning_tree.rb +61 -61
- data/examples/jabberwocky.txt +36 -0
- data/examples/models/wikipedia.rb +20 -0
- data/examples/munging/Gemfile +8 -0
- data/examples/munging/airline_flights/airline.rb +57 -0
- data/examples/munging/airline_flights/airline_flights.rake +83 -0
- data/{lib/wukong/settings.rb → examples/munging/airline_flights/airplane.rb} +0 -0
- data/examples/munging/airline_flights/airport.rb +211 -0
- data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
- data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
- data/examples/munging/airline_flights/flight.rb +156 -0
- data/examples/munging/airline_flights/models.rb +4 -0
- data/examples/munging/airline_flights/parse.rb +26 -0
- data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
- data/examples/munging/airline_flights/route.rb +35 -0
- data/examples/munging/airline_flights/tasks.rake +83 -0
- data/examples/munging/airline_flights/timezone_fixup.rb +62 -0
- data/examples/munging/airline_flights/topcities.rb +167 -0
- data/examples/munging/airports/40_wbans.txt +40 -0
- data/examples/munging/airports/filter_weather_reports.rb +37 -0
- data/examples/munging/airports/join.pig +31 -0
- data/examples/munging/airports/to_tsv.rb +33 -0
- data/examples/munging/airports/usa_wbans.pig +19 -0
- data/examples/munging/airports/usa_wbans.txt +2157 -0
- data/examples/munging/airports/wbans.pig +19 -0
- data/examples/munging/airports/wbans.txt +2310 -0
- data/examples/munging/geo/geo_json.rb +54 -0
- data/examples/munging/geo/geo_models.rb +69 -0
- data/examples/munging/geo/geonames_models.rb +78 -0
- data/examples/munging/geo/iso_codes.rb +172 -0
- data/examples/munging/geo/reconcile_countries.rb +124 -0
- data/examples/munging/geo/tasks.rake +71 -0
- data/examples/munging/rake_helper.rb +62 -0
- data/examples/munging/weather/.gitignore +1 -0
- data/examples/munging/weather/Gemfile +4 -0
- data/examples/munging/weather/Rakefile +28 -0
- data/examples/munging/weather/extract_ish.rb +13 -0
- data/examples/munging/weather/models/weather.rb +119 -0
- data/examples/munging/weather/utils/noaa_downloader.rb +46 -0
- data/examples/munging/wikipedia/README.md +34 -0
- data/examples/munging/wikipedia/Rakefile +193 -0
- data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
- data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
- data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
- data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
- data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
- data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
- data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
- data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +4 -0
- data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
- data/examples/munging/wikipedia/dbpedia/extract_links.rb +193 -0
- data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
- data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +18 -0
- data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +21 -0
- data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +27 -0
- data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +29 -0
- data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +14 -0
- data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +25 -0
- data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +29 -0
- data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +32 -0
- data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +85 -0
- data/examples/munging/wikipedia/pig_style_guide.md +25 -0
- data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +19 -0
- data/examples/munging/wikipedia/subuniverse/sub_articles.pig +23 -0
- data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +24 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +22 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +22 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +26 -0
- data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +29 -0
- data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +24 -0
- data/examples/munging/wikipedia/utils/get_namespaces.rb +86 -0
- data/examples/munging/wikipedia/utils/munging_utils.rb +68 -0
- data/examples/munging/wikipedia/utils/namespaces.json +1 -0
- data/examples/rake_helper.rb +85 -0
- data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
- data/examples/server_logs/logline.rb +95 -0
- data/examples/server_logs/models.rb +66 -0
- data/examples/server_logs/page_counts.pig +48 -0
- data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
- data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
- data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
- data/{old/examples/server_logs/breadcrumbs.rb → examples/server_logs/server_logs-03-breadcrumbs-full.rb} +26 -30
- data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
- data/examples/string_reverser.rb +26 -0
- data/examples/text/pig_latin.rb +2 -2
- data/examples/text/regional_flavor/README.md +14 -0
- data/examples/text/regional_flavor/article_wordbags.pig +39 -0
- data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
- data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
- data/examples/word_count/accumulator.rb +26 -0
- data/examples/word_count/tokenizer.rb +13 -0
- data/examples/word_count/word_count.rb +6 -0
- data/examples/workflow/cherry_pie.dot +97 -0
- data/examples/workflow/cherry_pie.png +0 -0
- data/examples/workflow/cherry_pie.rb +61 -26
- data/lib/hanuman.rb +34 -7
- data/lib/hanuman/graph.rb +55 -31
- data/lib/hanuman/graphvizzer.rb +199 -178
- data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
- data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
- data/lib/hanuman/link.rb +35 -0
- data/lib/hanuman/registry.rb +46 -0
- data/lib/hanuman/stage.rb +76 -32
- data/lib/wukong.rb +23 -24
- data/lib/wukong/boot.rb +87 -0
- data/lib/wukong/configuration.rb +8 -0
- data/lib/wukong/dataflow.rb +45 -78
- data/lib/wukong/driver.rb +99 -0
- data/lib/wukong/emitter.rb +22 -0
- data/lib/wukong/model/faker.rb +24 -24
- data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
- data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
- data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
- data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
- data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
- data/lib/wukong/processor.rb +60 -114
- data/lib/wukong/spec_helpers.rb +81 -0
- data/lib/wukong/spec_helpers/integration_driver.rb +144 -0
- data/lib/wukong/spec_helpers/integration_driver_matchers.rb +219 -0
- data/lib/wukong/spec_helpers/processor_helpers.rb +95 -0
- data/lib/wukong/spec_helpers/processor_methods.rb +108 -0
- data/lib/wukong/spec_helpers/shared_examples.rb +15 -0
- data/lib/wukong/spec_helpers/spec_driver.rb +28 -0
- data/lib/wukong/spec_helpers/spec_driver_matchers.rb +195 -0
- data/lib/wukong/version.rb +2 -1
- data/lib/wukong/widget/filters.rb +311 -0
- data/lib/wukong/widget/processors.rb +156 -0
- data/lib/wukong/widget/reducers.rb +7 -0
- data/lib/wukong/widget/reducers/accumulator.rb +73 -0
- data/lib/wukong/widget/reducers/bin.rb +318 -0
- data/lib/wukong/widget/reducers/count.rb +61 -0
- data/lib/wukong/widget/reducers/group.rb +85 -0
- data/lib/wukong/widget/reducers/group_concat.rb +70 -0
- data/lib/wukong/widget/reducers/moments.rb +72 -0
- data/lib/wukong/widget/reducers/sort.rb +130 -0
- data/lib/wukong/widget/serializers.rb +287 -0
- data/lib/wukong/widget/sink.rb +10 -52
- data/lib/wukong/widget/source.rb +7 -113
- data/lib/wukong/widget/utils.rb +46 -0
- data/lib/wukong/widgets.rb +6 -0
- data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
- data/spec/examples/dataflow/parsing_spec.rb +12 -11
- data/spec/examples/dataflow/simple_spec.rb +32 -6
- data/spec/examples/dataflow/telegram_spec.rb +36 -36
- data/spec/examples/graph/minimum_spanning_tree_spec.rb +30 -31
- data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
- data/spec/examples/munging/airline_flights_spec.rb +202 -0
- data/spec/examples/text/pig_latin_spec.rb +13 -16
- data/spec/examples/workflow/cherry_pie_spec.rb +34 -4
- data/spec/hanuman/graph_spec.rb +27 -2
- data/spec/hanuman/hanuman_spec.rb +10 -0
- data/spec/hanuman/registry_spec.rb +123 -0
- data/spec/hanuman/stage_spec.rb +61 -7
- data/spec/spec_helper.rb +29 -19
- data/spec/support/hanuman_test_helpers.rb +14 -12
- data/spec/support/shared_context_for_reducers.rb +37 -0
- data/spec/support/shared_examples_for_builders.rb +101 -0
- data/spec/support/shared_examples_for_shortcuts.rb +57 -0
- data/spec/support/wukong_test_helpers.rb +37 -11
- data/spec/wukong/dataflow_spec.rb +77 -55
- data/spec/wukong/local_runner_spec.rb +24 -24
- data/spec/wukong/model/faker_spec.rb +132 -131
- data/spec/wukong/runner_spec.rb +8 -8
- data/spec/wukong/widget/filters_spec.rb +61 -0
- data/spec/wukong/widget/processors_spec.rb +126 -0
- data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
- data/spec/wukong/widget/reducers/count_spec.rb +11 -0
- data/spec/wukong/widget/reducers/group_spec.rb +20 -0
- data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
- data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
- data/spec/wukong/widget/serializers_spec.rb +92 -0
- data/spec/wukong/widget/sink_spec.rb +15 -15
- data/spec/wukong/widget/source_spec.rb +65 -41
- data/spec/wukong/wukong_spec.rb +10 -0
- data/wukong.gemspec +17 -10
- metadata +359 -335
- data/.document +0 -5
- data/VERSION +0 -1
- data/bin/hdp-bin +0 -44
- data/bin/hdp-bzip +0 -23
- data/bin/hdp-cat +0 -3
- data/bin/hdp-catd +0 -3
- data/bin/hdp-cp +0 -3
- data/bin/hdp-du +0 -86
- data/bin/hdp-get +0 -3
- data/bin/hdp-kill +0 -3
- data/bin/hdp-kill-task +0 -3
- data/bin/hdp-ls +0 -11
- data/bin/hdp-mkdir +0 -2
- data/bin/hdp-mkdirp +0 -12
- data/bin/hdp-mv +0 -3
- data/bin/hdp-parts_to_keys.rb +0 -77
- data/bin/hdp-ps +0 -3
- data/bin/hdp-put +0 -3
- data/bin/hdp-rm +0 -32
- data/bin/hdp-sort +0 -40
- data/bin/hdp-stream +0 -40
- data/bin/hdp-stream-flat +0 -22
- data/bin/hdp-stream2 +0 -39
- data/bin/hdp-sync +0 -17
- data/bin/hdp-wc +0 -67
- data/bin/wu-flow +0 -10
- data/bin/wu-map +0 -17
- data/bin/wu-red +0 -17
- data/bin/wukong +0 -17
- data/data/CREDITS.md +0 -355
- data/data/graph/airfares.tsv +0 -2174
- data/data/text/gift_of_the_magi.txt +0 -225
- data/data/text/jabberwocky.txt +0 -36
- data/data/text/rectification_of_names.txt +0 -33
- data/data/twitter/a_atsigns_b.tsv +0 -64
- data/data/twitter/a_follows_b.tsv +0 -53
- data/data/twitter/tweet.tsv +0 -167
- data/data/twitter/twitter_user.tsv +0 -55
- data/data/wikipedia/dbpedia-sentences.tsv +0 -1000
- data/docpages/INSTALL.textile +0 -92
- data/docpages/LICENSE.textile +0 -107
- data/docpages/README-elastic_map_reduce.textile +0 -377
- data/docpages/README-performance.textile +0 -90
- data/docpages/README-wulign.textile +0 -65
- data/docpages/UsingWukong-part1-get_ready.textile +0 -17
- data/docpages/UsingWukong-part2-ThinkingBigData.textile +0 -75
- data/docpages/UsingWukong-part3-parsing.textile +0 -138
- data/docpages/_config.yml +0 -39
- data/docpages/avro/avro_notes.textile +0 -56
- data/docpages/avro/performance.textile +0 -36
- data/docpages/avro/tethering.textile +0 -19
- data/docpages/bigdata-tips.textile +0 -143
- data/docpages/code/api_response_example.txt +0 -20
- data/docpages/code/parser_skeleton.rb +0 -38
- data/docpages/diagrams/MapReduceDiagram.graffle +0 -0
- data/docpages/favicon.ico +0 -0
- data/docpages/gem.css +0 -16
- data/docpages/hadoop-tips.textile +0 -83
- data/docpages/index.textile +0 -92
- data/docpages/intro.textile +0 -8
- data/docpages/moreinfo.textile +0 -174
- data/docpages/news.html +0 -24
- data/docpages/pig/PigLatinExpressionsList.txt +0 -122
- data/docpages/pig/PigLatinReferenceManual.txt +0 -1640
- data/docpages/pig/commandline_params.txt +0 -26
- data/docpages/pig/cookbook.html +0 -481
- data/docpages/pig/images/hadoop-logo.jpg +0 -0
- data/docpages/pig/images/instruction_arrow.png +0 -0
- data/docpages/pig/images/pig-logo.gif +0 -0
- data/docpages/pig/piglatin_ref1.html +0 -1103
- data/docpages/pig/piglatin_ref2.html +0 -14340
- data/docpages/pig/setup.html +0 -505
- data/docpages/pig/skin/basic.css +0 -166
- data/docpages/pig/skin/breadcrumbs.js +0 -237
- data/docpages/pig/skin/fontsize.js +0 -166
- data/docpages/pig/skin/getBlank.js +0 -40
- data/docpages/pig/skin/getMenu.js +0 -45
- data/docpages/pig/skin/images/chapter.gif +0 -0
- data/docpages/pig/skin/images/chapter_open.gif +0 -0
- data/docpages/pig/skin/images/current.gif +0 -0
- data/docpages/pig/skin/images/external-link.gif +0 -0
- data/docpages/pig/skin/images/header_white_line.gif +0 -0
- data/docpages/pig/skin/images/page.gif +0 -0
- data/docpages/pig/skin/images/pdfdoc.gif +0 -0
- data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/docpages/pig/skin/print.css +0 -54
- data/docpages/pig/skin/profile.css +0 -181
- data/docpages/pig/skin/screen.css +0 -587
- data/docpages/pig/tutorial.html +0 -1059
- data/docpages/pig/udf.html +0 -1509
- data/docpages/tutorial.textile +0 -283
- data/docpages/usage.textile +0 -195
- data/docpages/wutils.textile +0 -263
- data/examples/dataflow/complex.rb +0 -11
- data/examples/dataflow/donuts.rb +0 -13
- data/examples/tiny_count/jabberwocky_output.tsv +0 -92
- data/examples/word_count.rb +0 -48
- data/examples/workflow/fiddle.rb +0 -24
- data/lib/away/escapement.rb +0 -129
- data/lib/away/exe.rb +0 -11
- data/lib/away/experimental.rb +0 -5
- data/lib/away/from_file.rb +0 -52
- data/lib/away/job.rb +0 -56
- data/lib/away/job/rake_compat.rb +0 -17
- data/lib/away/registry.rb +0 -79
- data/lib/away/runner.rb +0 -276
- data/lib/away/runner/execute.rb +0 -121
- data/lib/away/script.rb +0 -161
- data/lib/away/script/hadoop_command.rb +0 -240
- data/lib/away/source/file_list_source.rb +0 -15
- data/lib/away/source/looper.rb +0 -18
- data/lib/away/task.rb +0 -219
- data/lib/hanuman/action.rb +0 -21
- data/lib/hanuman/chain.rb +0 -4
- data/lib/hanuman/graphviz.rb +0 -74
- data/lib/hanuman/resource.rb +0 -6
- data/lib/hanuman/slot.rb +0 -87
- data/lib/hanuman/slottable.rb +0 -220
- data/lib/wukong/bad_record.rb +0 -15
- data/lib/wukong/event.rb +0 -44
- data/lib/wukong/local_runner.rb +0 -55
- data/lib/wukong/mapred.rb +0 -3
- data/lib/wukong/universe.rb +0 -48
- data/lib/wukong/widget/filter.rb +0 -81
- data/lib/wukong/widget/gibberish.rb +0 -123
- data/lib/wukong/widget/monitor.rb +0 -26
- data/lib/wukong/widget/reducer.rb +0 -66
- data/lib/wukong/widget/stringifier.rb +0 -50
- data/lib/wukong/workflow.rb +0 -22
- data/lib/wukong/workflow/command.rb +0 -42
- data/old/config/emr-example.yaml +0 -48
- data/old/examples/README.txt +0 -17
- data/old/examples/contrib/jeans/README.markdown +0 -165
- data/old/examples/contrib/jeans/data/normalized_sizes +0 -3
- data/old/examples/contrib/jeans/data/orders.tsv +0 -1302
- data/old/examples/contrib/jeans/data/sizes +0 -3
- data/old/examples/contrib/jeans/normalize.rb +0 -20
- data/old/examples/contrib/jeans/sizes.rb +0 -55
- data/old/examples/corpus/bnc_word_freq.rb +0 -44
- data/old/examples/corpus/bucket_counter.rb +0 -47
- data/old/examples/corpus/dbpedia_abstract_to_sentences.rb +0 -86
- data/old/examples/corpus/sentence_bigrams.rb +0 -53
- data/old/examples/corpus/sentence_coocurrence.rb +0 -66
- data/old/examples/corpus/stopwords.rb +0 -138
- data/old/examples/corpus/words_to_bigrams.rb +0 -53
- data/old/examples/emr/README.textile +0 -110
- data/old/examples/emr/dot_wukong_dir/credentials.json +0 -7
- data/old/examples/emr/dot_wukong_dir/emr.yaml +0 -69
- data/old/examples/emr/dot_wukong_dir/emr_bootstrap.sh +0 -33
- data/old/examples/emr/elastic_mapreduce_example.rb +0 -28
- data/old/examples/network_graph/adjacency_list.rb +0 -74
- data/old/examples/network_graph/breadth_first_search.rb +0 -72
- data/old/examples/network_graph/gen_2paths.rb +0 -68
- data/old/examples/network_graph/gen_multi_edge.rb +0 -112
- data/old/examples/network_graph/gen_symmetric_links.rb +0 -64
- data/old/examples/pagerank/README.textile +0 -6
- data/old/examples/pagerank/gen_initial_pagerank_graph.pig +0 -57
- data/old/examples/pagerank/pagerank.rb +0 -72
- data/old/examples/pagerank/pagerank_initialize.rb +0 -42
- data/old/examples/pagerank/run_pagerank.sh +0 -21
- data/old/examples/sample_records.rb +0 -33
- data/old/examples/server_logs/apache_log_parser.rb +0 -15
- data/old/examples/server_logs/nook.rb +0 -48
- data/old/examples/server_logs/nook/faraday_dummy_adapter.rb +0 -94
- data/old/examples/server_logs/user_agent.rb +0 -40
- data/old/examples/simple_word_count.rb +0 -82
- data/old/examples/size.rb +0 -61
- data/old/examples/stats/avg_value_frequency.rb +0 -86
- data/old/examples/stats/binning_percentile_estimator.rb +0 -140
- data/old/examples/stats/data/avg_value_frequency.tsv +0 -3
- data/old/examples/stats/rank_and_bin.rb +0 -173
- data/old/examples/stupidly_simple_filter.rb +0 -40
- data/old/examples/word_count.rb +0 -75
- data/old/graph/graphviz_builder.rb +0 -580
- data/old/graph_easy/Attributes.pm +0 -4181
- data/old/graph_easy/Graphviz.pm +0 -2232
- data/old/wukong.rb +0 -18
- data/old/wukong/and_pig.rb +0 -38
- data/old/wukong/bad_record.rb +0 -18
- data/old/wukong/datatypes.rb +0 -24
- data/old/wukong/datatypes/enum.rb +0 -127
- data/old/wukong/datatypes/fake_types.rb +0 -17
- data/old/wukong/decorator.rb +0 -28
- data/old/wukong/encoding/asciize.rb +0 -108
- data/old/wukong/extensions.rb +0 -16
- data/old/wukong/extensions/array.rb +0 -18
- data/old/wukong/extensions/blank.rb +0 -93
- data/old/wukong/extensions/class.rb +0 -189
- data/old/wukong/extensions/date_time.rb +0 -53
- data/old/wukong/extensions/emittable.rb +0 -69
- data/old/wukong/extensions/enumerable.rb +0 -79
- data/old/wukong/extensions/hash.rb +0 -167
- data/old/wukong/extensions/hash_keys.rb +0 -16
- data/old/wukong/extensions/hash_like.rb +0 -150
- data/old/wukong/extensions/hashlike_class.rb +0 -47
- data/old/wukong/extensions/module.rb +0 -2
- data/old/wukong/extensions/pathname.rb +0 -27
- data/old/wukong/extensions/string.rb +0 -65
- data/old/wukong/extensions/struct.rb +0 -17
- data/old/wukong/extensions/symbol.rb +0 -11
- data/old/wukong/filename_pattern.rb +0 -74
- data/old/wukong/helper.rb +0 -7
- data/old/wukong/helper/stopwords.rb +0 -195
- data/old/wukong/helper/tokenize.rb +0 -35
- data/old/wukong/logger.rb +0 -38
- data/old/wukong/periodic_monitor.rb +0 -72
- data/old/wukong/schema.rb +0 -269
- data/old/wukong/script.rb +0 -286
- data/old/wukong/script/avro_command.rb +0 -5
- data/old/wukong/script/cassandra_loader_script.rb +0 -40
- data/old/wukong/script/emr_command.rb +0 -168
- data/old/wukong/script/hadoop_command.rb +0 -237
- data/old/wukong/script/local_command.rb +0 -41
- data/old/wukong/store.rb +0 -10
- data/old/wukong/store/base.rb +0 -27
- data/old/wukong/store/cassandra.rb +0 -10
- data/old/wukong/store/cassandra/streaming.rb +0 -75
- data/old/wukong/store/cassandra/struct_loader.rb +0 -21
- data/old/wukong/store/cassandra_model.rb +0 -91
- data/old/wukong/store/chh_chunked_flat_file_store.rb +0 -37
- data/old/wukong/store/chunked_flat_file_store.rb +0 -48
- data/old/wukong/store/conditional_store.rb +0 -57
- data/old/wukong/store/factory.rb +0 -8
- data/old/wukong/store/flat_file_store.rb +0 -89
- data/old/wukong/store/key_store.rb +0 -51
- data/old/wukong/store/null_store.rb +0 -15
- data/old/wukong/store/read_thru_store.rb +0 -22
- data/old/wukong/store/tokyo_tdb_key_store.rb +0 -33
- data/old/wukong/store/tyrant_rdb_key_store.rb +0 -57
- data/old/wukong/store/tyrant_tdb_key_store.rb +0 -20
- data/old/wukong/streamer.rb +0 -30
- data/old/wukong/streamer/accumulating_reducer.rb +0 -83
- data/old/wukong/streamer/base.rb +0 -126
- data/old/wukong/streamer/counting_reducer.rb +0 -25
- data/old/wukong/streamer/filter.rb +0 -20
- data/old/wukong/streamer/instance_streamer.rb +0 -15
- data/old/wukong/streamer/json_streamer.rb +0 -21
- data/old/wukong/streamer/line_streamer.rb +0 -12
- data/old/wukong/streamer/list_reducer.rb +0 -31
- data/old/wukong/streamer/rank_and_bin_reducer.rb +0 -145
- data/old/wukong/streamer/record_streamer.rb +0 -14
- data/old/wukong/streamer/reducer.rb +0 -11
- data/old/wukong/streamer/set_reducer.rb +0 -14
- data/old/wukong/streamer/struct_streamer.rb +0 -48
- data/old/wukong/streamer/summing_reducer.rb +0 -29
- data/old/wukong/streamer/uniq_by_last_reducer.rb +0 -51
- data/old/wukong/typed_struct.rb +0 -12
- data/spec/away/encoding_spec.rb +0 -32
- data/spec/away/exe_spec.rb +0 -20
- data/spec/away/flow_spec.rb +0 -82
- data/spec/away/graph_spec.rb +0 -6
- data/spec/away/job_spec.rb +0 -15
- data/spec/away/rake_compat_spec.rb +0 -9
- data/spec/away/script_spec.rb +0 -81
- data/spec/hanuman/graphviz_spec.rb +0 -29
- data/spec/hanuman/slot_spec.rb +0 -2
- data/spec/support/examples_helper.rb +0 -10
- data/spec/support/streamer_test_helpers.rb +0 -6
- data/spec/support/wukong_widget_helpers.rb +0 -66
- data/spec/wukong/processor_spec.rb +0 -109
- data/spec/wukong/widget/filter_spec.rb +0 -99
- data/spec/wukong/widget/stringifier_spec.rb +0 -51
- data/spec/wukong/workflow/command_spec.rb +0 -5
@@ -1,41 +0,0 @@
|
|
1
|
-
module Wukong
|
2
|
-
#
|
3
|
-
# Local execution Options
|
4
|
-
#
|
5
|
-
module LocalCommand
|
6
|
-
|
7
|
-
Settings.define :sort_command, :default => 'sort'
|
8
|
-
Settings.define :sort_args, :default => [], :description => 'Extra params to send to the sort function eg: Settings.sort_args = ["-t", "\t", "-S", "200M"]'
|
9
|
-
|
10
|
-
def execute_local_workflow
|
11
|
-
Log.info " Reading STDIN / Writing STDOUT"
|
12
|
-
execute_command!(local_commandline)
|
13
|
-
end
|
14
|
-
|
15
|
-
# program, including arg, to sort input between mapper and reducer in local
|
16
|
-
# mode. You could override to for example run 'sort -n' (numeric sort).
|
17
|
-
def local_mode_sort_commandline
|
18
|
-
[ Settings.sort_command, Settings.sort_args ].flatten.join(" ")
|
19
|
-
end
|
20
|
-
|
21
|
-
#
|
22
|
-
# Commandline string to execute the job in local mode
|
23
|
-
#
|
24
|
-
# With an input path of '-', just uses $stdin
|
25
|
-
# With an output path of '-', just uses $stdout
|
26
|
-
#
|
27
|
-
def local_commandline
|
28
|
-
@input_paths = input_paths.map(&:strip).join(' ')
|
29
|
-
cmd_input_str = (input_paths == '-') ? "" : "cat '#{input_paths}' | "
|
30
|
-
cmd_output_str = (output_path == '-') ? "" : "> '#{output_path}'"
|
31
|
-
|
32
|
-
if (reducer || options[:reduce_command])
|
33
|
-
%Q{ #{cmd_input_str} #{mapper_commandline} | #{local_mode_sort_commandline} | #{reducer_commandline} #{cmd_output_str} }
|
34
|
-
else
|
35
|
-
%Q{ #{cmd_input_str} #{mapper_commandline} | #{local_mode_sort_commandline} #{cmd_output_str} }
|
36
|
-
end
|
37
|
-
|
38
|
-
end
|
39
|
-
|
40
|
-
end
|
41
|
-
end
|
data/old/wukong/store.rb
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
module Wukong
|
2
|
-
module Store
|
3
|
-
autoload :Base, 'wukong/store/base'
|
4
|
-
autoload :FlatFileStore, 'wukong/store/flat_file_store'
|
5
|
-
autoload :ChunkedFlatFileStore, 'wukong/store/chunked_flat_file_store'
|
6
|
-
autoload :ChhChunkedFlatFileStore, 'wukong/store/chh_chunked_flat_file_store'
|
7
|
-
|
8
|
-
autoload :CassandraModel, 'wukong/store/cassandra_model'
|
9
|
-
end
|
10
|
-
end
|
data/old/wukong/store/base.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
module Wukong
|
2
|
-
module Store
|
3
|
-
class Base
|
4
|
-
def initialize options={}
|
5
|
-
Log.info "Creating #{self.class} with #{options.inspect}"
|
6
|
-
end
|
7
|
-
|
8
|
-
#Iterate through each object casting it as a new object of klass.
|
9
|
-
def each_as klass, &block
|
10
|
-
self.each do |*args|
|
11
|
-
begin
|
12
|
-
item = klass.new *args[1..-1]
|
13
|
-
rescue StandardError => e
|
14
|
-
Log.info [args, e.to_s, self].join("\t")
|
15
|
-
raise e
|
16
|
-
end
|
17
|
-
yield item
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def log_line
|
22
|
-
nil
|
23
|
-
end
|
24
|
-
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
@@ -1,10 +0,0 @@
|
|
1
|
-
Settings.define :cassandra_hosts, :default => '127.0.0.1:9160', :type => Array, :description => 'Comma-delimited list of hostname:port addresses for the Cassandra database holding Twitter API objects'
|
2
|
-
Settings.define :cassandra_keyspace, :default => 'soc_net_tw', :description => 'Cassandra keyspace for Twitter objects'
|
3
|
-
|
4
|
-
module Wukong
|
5
|
-
module Store
|
6
|
-
module CassandraStore
|
7
|
-
autoload :StructLoader, 'wukong/store/cassandra/struct_loader'
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end
|
@@ -1,75 +0,0 @@
|
|
1
|
-
require 'avro'
|
2
|
-
|
3
|
-
Settings.define :cassandra_avro_schema, :default => ('/usr/local/share/cassandra/interface/avro/cassandra.avpr')
|
4
|
-
module Wukong::Store::CassandraModel
|
5
|
-
|
6
|
-
#
|
7
|
-
# Store model using avro writer
|
8
|
-
#
|
9
|
-
def streaming_save
|
10
|
-
self.class.streaming_insert id, self
|
11
|
-
end
|
12
|
-
module ClassMethods
|
13
|
-
|
14
|
-
def streaming_writer
|
15
|
-
@streaming_writer ||= AvroWriter.new
|
16
|
-
end
|
17
|
-
|
18
|
-
#
|
19
|
-
# Use avro and stream into cassandra
|
20
|
-
#
|
21
|
-
def streaming_insert id, hsh
|
22
|
-
streaming_writer.put(id.to_s, hsh.to_db_hash)
|
23
|
-
end
|
24
|
-
end
|
25
|
-
class AvroWriter
|
26
|
-
#
|
27
|
-
# Reads in the protocol schema
|
28
|
-
# creates the necessary encoder and writer.
|
29
|
-
#
|
30
|
-
def initialize
|
31
|
-
schema_file = Settings.cassandra_avro_schema
|
32
|
-
@proto = Avro::Protocol.parse(File.read(schema_file))
|
33
|
-
@schema = @proto.types.detect{|schema| schema.name == 'StreamingMutation'}
|
34
|
-
@enc = Avro::IO::BinaryEncoder.new($stdout)
|
35
|
-
# @enc = DummyEncoder.new($stdout)
|
36
|
-
@writer = Avro::IO::DatumWriter.new(@schema)
|
37
|
-
# warn [@schema, @enc].inspect
|
38
|
-
end
|
39
|
-
|
40
|
-
def write key, col_name, value
|
41
|
-
@writer.write(smutation(key, col_name, value), @enc)
|
42
|
-
end
|
43
|
-
|
44
|
-
def write_directly key, col_name, value, timestamp, ttl
|
45
|
-
# Log.info "Insert(row_key => #{key}, col_name => #{col_name}, value => #{value}"
|
46
|
-
@enc.write_bytes(key)
|
47
|
-
@enc.write_bytes(col_name)
|
48
|
-
@enc.write_bytes(value)
|
49
|
-
@enc.write_long(timestamp)
|
50
|
-
@enc.write_int(ttl)
|
51
|
-
end
|
52
|
-
|
53
|
-
#
|
54
|
-
# Iterate through each key value pair in the hash to
|
55
|
-
# be inserted and write directly one at a time
|
56
|
-
#
|
57
|
-
def put id, hsh, timestamp=nil, ttl=0
|
58
|
-
timestamp ||= Time.now.to_i
|
59
|
-
hsh.each do |attr, val|
|
60
|
-
write_directly(id, attr, val, timestamp, ttl)
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
def smutation key, name, value
|
65
|
-
{
|
66
|
-
'key' => key,
|
67
|
-
'name' => name.to_s,
|
68
|
-
'value' => value.to_s,
|
69
|
-
'timestamp' => Time.epoch_microseconds,
|
70
|
-
'ttl' => 0
|
71
|
-
}
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
end
|
@@ -1,21 +0,0 @@
|
|
1
|
-
require 'avro'
|
2
|
-
|
3
|
-
Settings.define :cassandra_avro_schema, :default => ('/usr/local/share/cassandra/interface/avro/cassandra.avpr')
|
4
|
-
|
5
|
-
module Wukong::Store::Cassandra
|
6
|
-
class StructLoader < Wukong::Streamer::StructStreamer
|
7
|
-
def initialize *args
|
8
|
-
super(*args)
|
9
|
-
@log = PeriodicMonitor.new
|
10
|
-
end
|
11
|
-
|
12
|
-
#
|
13
|
-
# Blindly expects objects streaming by to have a "streaming_save" method
|
14
|
-
#
|
15
|
-
def process object, *_
|
16
|
-
# object.save
|
17
|
-
object.streaming_save
|
18
|
-
@log.periodically(object.to_flat)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
@@ -1,91 +0,0 @@
|
|
1
|
-
module Wukong
|
2
|
-
module Store
|
3
|
-
#
|
4
|
-
# Barebones interface between a wukong class and a cassandra database
|
5
|
-
#
|
6
|
-
# Class must somehow provide a class-level cassandra_db accessor
|
7
|
-
# that sets the @cassandra_db instance variable.
|
8
|
-
#
|
9
|
-
module CassandraModel
|
10
|
-
#
|
11
|
-
# Store model to the DB
|
12
|
-
#
|
13
|
-
def save
|
14
|
-
self.class.insert key, self.to_db_hash
|
15
|
-
end
|
16
|
-
|
17
|
-
#
|
18
|
-
# Flatten attributes for storage in the DB.
|
19
|
-
#
|
20
|
-
# * omits elements whose value is nil
|
21
|
-
# * calls to_s on everything else
|
22
|
-
# * This means that blank strings are preserved;
|
23
|
-
# * and that false is saved as 'false'
|
24
|
-
#
|
25
|
-
# Override if you think something fancier than that should happen.
|
26
|
-
#
|
27
|
-
def to_db_hash
|
28
|
-
db_hsh = {}
|
29
|
-
each_pair{|k,v| db_hsh[k.to_s] = v.to_s unless v.nil? }
|
30
|
-
db_hsh
|
31
|
-
end
|
32
|
-
|
33
|
-
|
34
|
-
module ClassMethods
|
35
|
-
# Cassandra column family -- taken from the class name by default.
|
36
|
-
def table_name
|
37
|
-
class_basename
|
38
|
-
end
|
39
|
-
|
40
|
-
# Override to control how your class is instantiated from the DB hash
|
41
|
-
def from_db_hash *args
|
42
|
-
from_hash *args
|
43
|
-
end
|
44
|
-
|
45
|
-
# Insert into the cassandra database
|
46
|
-
# uses object's #to_db_hash method
|
47
|
-
def insert key, *args
|
48
|
-
hsh = args.first
|
49
|
-
cassandra_db.insert(table_name, key.to_s, hsh)
|
50
|
-
end
|
51
|
-
|
52
|
-
# Insert into the cassandra database
|
53
|
-
# calls out to object's #from_db_hash method
|
54
|
-
def load key
|
55
|
-
hsh = cassandra_db.get(self.class_basename, key.to_s)
|
56
|
-
from_db_hash(hsh) if hsh
|
57
|
-
end
|
58
|
-
|
59
|
-
# invalidates cassandra connection on errors where that makes sense.
|
60
|
-
def handle_error action, e
|
61
|
-
warn "#{action} failed: #{e} #{e.backtrace.join("\t")}" ;
|
62
|
-
@cassandra_db = nil
|
63
|
-
sleep 0.2
|
64
|
-
end
|
65
|
-
end
|
66
|
-
# The standard 'inject class methods when module is included' trick
|
67
|
-
def self.included base
|
68
|
-
base.class_eval{ extend ClassMethods}
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
Hash.class_eval do
|
76
|
-
#
|
77
|
-
# Flatten attributes for storage in the DB.
|
78
|
-
#
|
79
|
-
# * omits elements whose value is nil
|
80
|
-
# * calls to_s on everything else
|
81
|
-
# * This means that blank strings are preserved;
|
82
|
-
# * and that false is saved as 'false'
|
83
|
-
#
|
84
|
-
# Override if you think something fancier than that should happen.
|
85
|
-
#
|
86
|
-
def to_db_hash
|
87
|
-
db_hsh = {}
|
88
|
-
to_hash.each{|k,v| db_hsh[k.to_s] = v.to_s unless v.nil? }
|
89
|
-
db_hsh
|
90
|
-
end
|
91
|
-
end
|
@@ -1,37 +0,0 @@
|
|
1
|
-
module Wukong
|
2
|
-
module Store
|
3
|
-
class ChhChunkedFlatFileStore < Wukong::Store::FlatFileStore
|
4
|
-
attr_accessor :filename_pattern, :handle, :rootdir
|
5
|
-
|
6
|
-
# Move to configliere
|
7
|
-
Settings.define :chunk_file_pattern, :default => ":rootdir/:date/:handle:timestamp-:pid.tsv",:description => "The pattern for chunked files."
|
8
|
-
Settings.define :chunk_file_rootdir, :default => nil, :description => "The root directory for the chunked files."
|
9
|
-
|
10
|
-
#Note that filemode is inherited from flat_file
|
11
|
-
|
12
|
-
def initialize options={}
|
13
|
-
# super wants a :filename in the options or it will fail. We need to get the initial filename
|
14
|
-
# set up before we call super, so we need all of the parts of the pattern set up.
|
15
|
-
self.rootdir = options[:rootdir] || Settings[:chunk_file_rootdir]
|
16
|
-
self.handle = options[:handle]
|
17
|
-
pattern = options[:pattern] || Settings[:chunk_file_pattern]
|
18
|
-
self.filename_pattern = FilenamePattern.new(pattern, :handle => handle, :rootdir => self.rootdir)
|
19
|
-
options[:filename] = filename_pattern.make()
|
20
|
-
|
21
|
-
super options
|
22
|
-
|
23
|
-
self.mkdir!
|
24
|
-
end
|
25
|
-
|
26
|
-
def new_chunk
|
27
|
-
new_filename = filename_pattern.make()
|
28
|
-
Log.info "Rotating chunked file #{filename} into #{new_filename}"
|
29
|
-
self.flush
|
30
|
-
self.close
|
31
|
-
@filename = new_filename
|
32
|
-
self.mkdir!
|
33
|
-
end
|
34
|
-
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
@@ -1,48 +0,0 @@
|
|
1
|
-
require 'wukong/monitor/periodic_monitor'
|
2
|
-
module Wukong
|
3
|
-
module Store
|
4
|
-
class ChunkedFlatFileStore < Wukong::Store::FlatFileStore
|
5
|
-
attr_accessor :filename_pattern, :chunk_monitor, :handle, :chunktime, :rootdir
|
6
|
-
|
7
|
-
# Move to configliere
|
8
|
-
Settings.define :chunk_file_pattern, :default => ":rootdir/:date/:handle-:timestamp-:pid.tsv",:description => "The pattern for chunked files."
|
9
|
-
Settings.define :chunk_file_interval, :default => 4*60*60, :description => "The time interval to keep a chunk file open."
|
10
|
-
Settings.define :chunk_file_rootdir, :default => '/tmp', :description => "The root directory for the chunked files."
|
11
|
-
|
12
|
-
#Note that filemode is inherited from flat_file
|
13
|
-
|
14
|
-
def initialize options={}
|
15
|
-
# super wants a :filename in the options or it will fail. We need to get the initial filename
|
16
|
-
# set up before we call super, so we need all of the parts of the pattern set up.
|
17
|
-
self.chunktime = options[:interval] || Settings[:chunk_file_interval]
|
18
|
-
self.rootdir = options[:rootdir] || Settings[:chunk_file_rootdir]
|
19
|
-
self.handle = options[:handle]
|
20
|
-
pattern = options[:pattern] || Settings[:chunk_file_pattern]
|
21
|
-
self.filename_pattern = FilenamePattern.new(pattern, :handle => handle, :rootdir => self.rootdir)
|
22
|
-
options[:filename] = filename_pattern.make()
|
23
|
-
options[:filemode] ||= 'a'
|
24
|
-
Log.warn "You don't really want a chunk time this small: #{self.chunktime}" unless self.chunktime > 600
|
25
|
-
self.chunk_monitor = Wukong::Monitor::PeriodicMonitor.new( :time => self.chunktime )
|
26
|
-
|
27
|
-
super options
|
28
|
-
self.mkdir!
|
29
|
-
end
|
30
|
-
|
31
|
-
def new_chunk!
|
32
|
-
new_filename = filename_pattern.make()
|
33
|
-
Log.info "Rotating chunked file #{filename} into #{new_filename}"
|
34
|
-
self.flush
|
35
|
-
self.close
|
36
|
-
@filename = new_filename
|
37
|
-
self.mkdir!
|
38
|
-
end
|
39
|
-
|
40
|
-
def save *args
|
41
|
-
result = super *args
|
42
|
-
chunk_monitor.periodically{ new_chunk! }
|
43
|
-
result
|
44
|
-
end
|
45
|
-
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
@@ -1,57 +0,0 @@
|
|
1
|
-
module Monkeyshines
|
2
|
-
module Store
|
3
|
-
class ConditionalStore < Monkeyshines::Store::Base
|
4
|
-
attr_accessor :options, :cache, :store, :misses
|
5
|
-
|
6
|
-
DEFAULT_OPTIONS = {
|
7
|
-
:cache => { :type => :tyrant_rdb_key_store },
|
8
|
-
:store => { :type => :chunked_flat_file_store },
|
9
|
-
}
|
10
|
-
|
11
|
-
#
|
12
|
-
#
|
13
|
-
# +cache+ must behave like a hash (Hash and
|
14
|
-
# Monkeyshines::Store::TyrantRdbKeyStore are both cromulent
|
15
|
-
# choices).
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
def initialize _options
|
20
|
-
self.options = DEFAULT_OPTIONS.deep_merge(_options)
|
21
|
-
self.cache = Monkeyshines::Store.create(options[:cache])
|
22
|
-
self.store = Monkeyshines::Store.create(options[:store])
|
23
|
-
self.misses = 0
|
24
|
-
end
|
25
|
-
|
26
|
-
#
|
27
|
-
# If key is absent, save the result of calling the block.
|
28
|
-
# If key is present, block is never called.
|
29
|
-
#
|
30
|
-
# Ex:
|
31
|
-
# rt_store.set(url) do
|
32
|
-
# fetcher.get url # will only be called if url isn't in rt_store
|
33
|
-
# end
|
34
|
-
#
|
35
|
-
def set key, force=nil, &block
|
36
|
-
return if (!force) && cache.include?(key)
|
37
|
-
cache_val, store_val = block.call()
|
38
|
-
return unless cache_val
|
39
|
-
cache.set_nr key, cache_val # update cache
|
40
|
-
store << store_val # save value
|
41
|
-
self.misses += 1 # track the cache miss
|
42
|
-
store_val
|
43
|
-
end
|
44
|
-
|
45
|
-
def size() cache.size end
|
46
|
-
|
47
|
-
def log_line
|
48
|
-
[size, "%8d misses"%misses]
|
49
|
-
end
|
50
|
-
|
51
|
-
def close()
|
52
|
-
cache.close
|
53
|
-
store.close
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
data/old/wukong/store/factory.rb
DELETED
@@ -1,89 +0,0 @@
|
|
1
|
-
require 'fileutils'; include FileUtils
|
2
|
-
|
3
|
-
module Wukong
|
4
|
-
module Store
|
5
|
-
#
|
6
|
-
class FlatFileStore < Store::Base
|
7
|
-
attr_accessor :filename, :filemode
|
8
|
-
|
9
|
-
#
|
10
|
-
# +filename_root+ : first part of name for files
|
11
|
-
#
|
12
|
-
def initialize options={}
|
13
|
-
super options
|
14
|
-
self.filename = options[:filename] or raise "Missing filename in #{self.class}"
|
15
|
-
self.filemode = options[:filemode] || 'r'
|
16
|
-
skip!(options[:skip]) if options[:skip]
|
17
|
-
end
|
18
|
-
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
def each &block
|
23
|
-
file.each do |line|
|
24
|
-
attrs = line.chomp.split("\t")
|
25
|
-
next if attrs.blank?
|
26
|
-
yield *attrs
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
#
|
31
|
-
# Read ahead n_lines lines in the file
|
32
|
-
#
|
33
|
-
def skip! n_lines
|
34
|
-
Log.info "Skipping #{n_lines} in #{self.class}:#{filename}"
|
35
|
-
n_lines.times do
|
36
|
-
file.readline
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
#
|
41
|
-
# Open the timestamped file,
|
42
|
-
# ensuring its directory exists
|
43
|
-
#
|
44
|
-
def file
|
45
|
-
return @file if @file
|
46
|
-
Log.info "Opening file #{filename} with mode #{filemode}"
|
47
|
-
@file = File.open(filename, filemode)
|
48
|
-
end
|
49
|
-
|
50
|
-
# Close the dump file
|
51
|
-
def close
|
52
|
-
@file.close if @file
|
53
|
-
@file = nil
|
54
|
-
end
|
55
|
-
|
56
|
-
def flush
|
57
|
-
@file.flush if @file
|
58
|
-
end
|
59
|
-
|
60
|
-
# Ensure the file's directory exists
|
61
|
-
def mkdir!
|
62
|
-
dir = File.dirname(filename)
|
63
|
-
return if File.directory?(dir)
|
64
|
-
Log.info "Making directory #{dir}"
|
65
|
-
FileUtils.mkdir_p dir
|
66
|
-
end
|
67
|
-
|
68
|
-
# write to the file
|
69
|
-
def save obj
|
70
|
-
file.puts obj
|
71
|
-
obj
|
72
|
-
end
|
73
|
-
|
74
|
-
# returns the size of the current file
|
75
|
-
def size
|
76
|
-
return 0 if !@file
|
77
|
-
File.size(filename)
|
78
|
-
end
|
79
|
-
|
80
|
-
# delegates to +#save+ -- writes the object to the file. Returns self for chaining on the stream.
|
81
|
-
def <<(obj)
|
82
|
-
save obj
|
83
|
-
self
|
84
|
-
end
|
85
|
-
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|