rake_hdfs 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1310a15bf3d66d81d1b6b51ff81c72222369100d
4
+ data.tar.gz: b59b9a58c2baac58adf6a0476607c4252b5a3b71
5
+ SHA512:
6
+ metadata.gz: 335720d67d9b386cf63945bb36819b745be82ab1c8d9e6f1c3e96c85f36c93d9b0b1772bbac70a6fadc500bb0487fe8df5d05860a6360a38dc904f00b6f8bd76
7
+ data.tar.gz: 75311d15ac8fe3d460f3644ff32623ccb8242ec46764ae61c98bd206f82f90f224a08068227aa79f8de05aa3b6be13f6de721180d8fc615cced466b2146d64b0
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.3.0
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in rake_hadoop.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,71 @@
1
+ # RakeHdfs
2
+
3
+ this gem makes rake run on hdfs file system. it's based on webhdfs.
4
+
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ ```ruby
11
+ gem 'rake_hdfs'
12
+ ```
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install rake_hdfs
21
+
22
+ ## Usage
23
+
24
+ ```ruby
25
+ require "webhdfs/fileutils"
26
+ require "rake_hdfs"
27
+ $dir = "/user/chenkovsky.chen"
28
+
29
+ WebHDFS::FileUtils.set_server("localhost", 50070, "chenkovsky", nil)
30
+
31
+ hdirectory "#{$dir}/tmp_dir"
32
+ desc "test hdfs rake"
33
+ hfile "#{$dir}/tmp_dir/tmp.txt" => ["#{$dir}/tmp_dir"] do
34
+ raise "tmp file should not exist." if hexist? "#{$dir}/tmp_dir/tmp.txt"
35
+ files = hls $dir
36
+ puts files
37
+ dir_mtime = hmtime $dir
38
+ puts dir_mtime
39
+ hcopy_from_local "tmp.txt", "#{$dir}/tmp_dir/tmp.txt"
40
+ hcopy_from_local_via_stream "tmp.txt", "#{$dir}/tmp_dir/tmp2.txt"
41
+
42
+ hcopy_to_local "#{$dir}/tmp_dir/tmp.txt", "tmp3.txt"
43
+
44
+ happend("#{$dir}/tmp_dir/tmp2.txt", "hahaha")
45
+
46
+ hmkdir "#{$dir}/tmp2_dir"
47
+
48
+ raise "tmp2_dir should not exist." if not hexist? "#{$dir}/tmp2_dir"
49
+
50
+ hrm "#{$dir}/tmp2_dir"
51
+ hmkdir "#{$dir}/tmp3_dir"
52
+ hcopy_from_local "tmp.txt", "#{$dir}/tmp3_dir/tmp.txt"
53
+
54
+ hrmr "#{$dir}/tmp3_dir"
55
+
56
+ hrename "#{$dir}/tmp_dir/tmp2.txt", "#{$dir}/tmp_dir/tmp4.txt"
57
+
58
+ hchmod 0755, "#{$dir}/tmp_dir/tmp4.txt"
59
+
60
+ puts (hstat "#{$dir}/tmp_dir/tmp4.txt")
61
+ raise "not correct uptodate" unless huptodate? "#{$dir}/tmp_dir/tmp4.txt", ["#{$dir}/tmp_dir/tmp.txt"]
62
+ end
63
+ ```
64
+
65
+ ## Contributing
66
+
67
+ 1. Fork it ( https://github.com/chenkovsky/rake_hdfs/fork )
68
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
69
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
70
+ 4. Push to the branch (`git push origin my-new-feature`)
71
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ gem "webhdfs"
@@ -0,0 +1,24 @@
1
+ require 'rake/file_utils_ext'
2
+ module Rake
3
+ module DSL
4
+ def hfile(*args, &block) # :doc:
5
+ Rake::HDFSFileTask.define_task(*args, &block)
6
+ end
7
+
8
+ def hfile_create(*args, &block)
9
+ Rake::HDFSFileCreationTask.define_task(*args, &block)
10
+ end
11
+
12
+ def hdirectory(*args, &block) # :doc:
13
+ result = hfile_create(*args, &block)
14
+ dir, _ = *Rake.application.resolve_args(args)
15
+ dir = Rake.from_pathname(dir)
16
+ Rake.each_dir_parent(dir) do |d|
17
+ hfile_create d do |t|
18
+ hmkdir_p t.name unless WebHDFS::FileUtils.exist?(t.name)
19
+ end
20
+ end
21
+ result
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,23 @@
1
+ require 'rake/file_task'
2
+ require 'rake/early_time'
3
+
4
+ module Rake
5
+
6
+ # A FileCreationTask is a file task that when used as a dependency will be
7
+ # needed if and only if the file has not been created. Once created, it is
8
+ # not re-triggered if any of its dependencies are newer, nor does trigger
9
+ # any rebuilds of tasks that depend on it whenever it is updated.
10
+ #
11
+ class HDFSFileCreationTask < FileTask
12
+ # Is this file task needed? Yes if it doesn't exist.
13
+ def needed?
14
+ ! WebHDFS::FileUtils.exist?(name)
15
+ end
16
+
17
+ # Time stamp for file creation task. This time stamp is earlier
18
+ # than any other time stamp.
19
+ def timestamp
20
+ Rake::EARLY
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,46 @@
1
+ require 'rake/task.rb'
2
+ require 'rake/early_time'
3
+
4
+ module Rake
5
+
6
+ # A FileTask is a task that includes time based dependencies. If any of a
7
+ # FileTask's prerequisites have a timestamp that is later than the file
8
+ # represented by this task, then the file must be rebuilt (using the
9
+ # supplied actions).
10
+ #
11
+ class HDFSFileTask < Task
12
+
13
+ # Is this file task needed? Yes if it doesn't exist, or if its time stamp
14
+ # is out of date.
15
+ def needed?
16
+ ! WebHDFS::FileUtils.exist?(name) || out_of_date?(timestamp) || @application.options.build_all
17
+ end
18
+
19
+ # Time stamp for file task.
20
+ def timestamp
21
+ if WebHDFS::FileUtils.exist?(name)
22
+ WebHDFS::FileUtils.mtime(name.to_s)
23
+ else
24
+ Rake::LATE
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ # Are there any prerequisites with a later time than the given time stamp?
31
+ def out_of_date?(stamp)
32
+ @prerequisites.any? { |n| application[n, @scope].timestamp > stamp }
33
+ end
34
+
35
+ # ----------------------------------------------------------------
36
+ # Task class methods.
37
+ #
38
+ class << self
39
+ # Apply the scope to the task name according to the rules for this kind
40
+ # of task. File based tasks ignore the scope when creating the name.
41
+ def scope_name(scope, task_name)
42
+ Rake.from_pathname(task_name)
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,203 @@
1
+ require "webhdfs/fileutils"
2
+ module WebHDFS
3
+ module FileUtils
4
+ def exist?(file, options={})
5
+ begin
6
+ client.stat(file, options)
7
+ return true
8
+ rescue FileNotFoundError => e
9
+ return false
10
+ end
11
+ end
12
+ module_function :exist?
13
+ def ls(path, options={})
14
+ opts = options.dup
15
+ fu_log "ls #{path}" if opts.delete(:verbose)
16
+ client.list(path, options)
17
+ end
18
+ module_function :ls
19
+
20
+ def stat(path, options={})
21
+ opts = options.dup
22
+ fu_log "stat #{path}" if opts.delete(:verbose)
23
+ client.stat(path, options)
24
+ end
25
+ module_function :stat
26
+
27
+ def uptodate?(new, old_list, options = {})
28
+ return false unless exist?(new)
29
+ new_time = mtime(new)
30
+ old_list.each do |old|
31
+ if exist?(old)
32
+ return false unless new_time > mtime(old)
33
+ end
34
+ end
35
+ true
36
+ end
37
+ module_function :uptodate?
38
+
39
+ def mtime(path, options = {})
40
+ file_stat = stat(path, options)
41
+ file_stat["modificationTime"]
42
+ end
43
+ module_function :mtime
44
+ end
45
+ end
46
+
47
+ module FileUtils
48
+ def hdfs_copy_from_local(file, path, options={})
49
+ WebHDFS::FileUtils.copy_from_local(file, path, options)
50
+ end
51
+ module_function :hdfs_copy_from_local
52
+
53
+ alias :hcopy_from_local :hdfs_copy_from_local
54
+ module_function :hcopy_from_local
55
+
56
+ def hdfs_copy_from_local_via_stream(file, path, options={})
57
+ WebHDFS::FileUtils.copy_from_local_via_stream(file, path, options)
58
+ end
59
+ module_function :hdfs_copy_from_local_via_stream
60
+ alias :hcopy_from_local_via_stream :hdfs_copy_from_local_via_stream
61
+ module_function :hcopy_from_local_via_stream
62
+
63
+ def hdfs_copy_to_local(path, file, options={})
64
+ WebHDFS::FileUtils.copy_to_local(path, file, options)
65
+ end
66
+
67
+ module_function :hdfs_copy_to_local
68
+
69
+ alias :hcopy_to_local :hdfs_copy_to_local
70
+ module_function :hcopy_to_local
71
+
72
+ def hdfs_append(path, body, options={})
73
+ WebHDFS::FileUtils.append(path,body,options)
74
+ end
75
+ module_function :hdfs_append
76
+
77
+ alias :happend :hdfs_append
78
+ module_function :happend
79
+
80
+ def hdfs_mkdir(list, options={})
81
+ WebHDFS::FileUtils.mkdir(list, options)
82
+ end
83
+
84
+ module_function :hdfs_mkdir
85
+
86
+ alias :hmkdir :hdfs_mkdir
87
+ module_function :hmkdir
88
+
89
+ alias :hdfs_mkdir_p :hdfs_mkdir
90
+ module_function :hdfs_mkdir_p
91
+
92
+ alias :hmkdir_p :hdfs_mkdir
93
+ module_function :hmkdir_p
94
+
95
+ def hdfs_rm(list, options={})
96
+ WebHDFS::FileUtils.rm(list, options)
97
+ end
98
+ module_function :hdfs_rm
99
+ alias :hrm :hdfs_rm
100
+ module_function :hrm
101
+
102
+ def hdfs_rmr(list, options={})
103
+ WebHDFS::FileUtils.rmr(list, options)
104
+ end
105
+ module_function :hdfs_rmr
106
+
107
+ alias :hrmr :hdfs_rmr
108
+ module_function :hrmr
109
+
110
+ def hdfs_rename(src, dst, options={})
111
+ WebHDFS::FileUtils.rename(src, dst, options)
112
+ end
113
+
114
+ module_function :hdfs_rename
115
+
116
+ alias :hrename :hdfs_rename
117
+ module_function :hrename
118
+
119
+ def hdfs_chmod(mode, list, options={})
120
+ WebHDFS::FileUtils.chmod(mode, list, options)
121
+ end
122
+
123
+ module_function :hdfs_chmod
124
+
125
+ alias :hchmod :hdfs_chmod
126
+ module_function :hchmod
127
+
128
+ def hdfs_chown(user, group, list, options={})
129
+ WebHDFS::FileUtils.chown(user, group, list, options)
130
+ end
131
+
132
+ module_function :hdfs_chown
133
+
134
+ alias :hchown :hdfs_chown
135
+ module_function :hchown
136
+
137
+ def hdfs_set_repl_factor(list, num, options={})
138
+ WebHDFS::FileUtils.set_repl_factor(list, num, options)
139
+ end
140
+ module_function :hdfs_set_repl_factor
141
+
142
+ alias :hset_repl_factor :hdfs_set_repl_factor
143
+ module_function :hset_repl_factor
144
+
145
+ def hdfs_set_atime(list, time, options={})
146
+ WebHDFS::FileUtils.set_atime(list, time, options)
147
+ end
148
+
149
+ module_function :hdfs_set_atime
150
+
151
+ alias :hset_atime :hdfs_set_atime
152
+ module_function :hset_atime
153
+
154
+ def hdfs_set_mtime(list, time, options={})
155
+ WebHDFS::FileUtils.set_mtime(list, time, options)
156
+ end
157
+
158
+ module_function :hdfs_set_mtime
159
+
160
+ alias :hset_mtime :hdfs_set_mtime
161
+ module_function :hset_mtime
162
+
163
+ def hdfs_ls(path, options={})
164
+ WebHDFS::FileUtils.ls(path, options)
165
+ end
166
+
167
+ module_function :hdfs_ls
168
+ alias :hls :hdfs_ls
169
+ module_function :hls
170
+
171
+ def hdfs_stat(path, options={})
172
+ WebHDFS::FileUtils.stat(path, options)
173
+ end
174
+
175
+ module_function :hdfs_stat
176
+ alias :hstat :hdfs_stat
177
+ module_function :hstat
178
+
179
+ def hdfs_exist?(path, options={})
180
+ WebHDFS::FileUtils.exist?(path, options)
181
+ end
182
+
183
+ module_function :hdfs_exist?
184
+ alias :hexist? :hdfs_exist?
185
+ module_function :hexist?
186
+
187
+ def hdfs_uptodate?(new, old_list, options={})
188
+ WebHDFS::FileUtils.uptodate?(new, old_list, options)
189
+ end
190
+
191
+ module_function :hdfs_uptodate?
192
+ alias :huptodate? :hdfs_uptodate?
193
+ module_function :huptodate?
194
+
195
+ def hdfs_mtime(path, options={})
196
+ WebHDFS::FileUtils.mtime(path, options)
197
+ end
198
+
199
+ module_function :hdfs_mtime
200
+ alias :hmtime :hdfs_mtime
201
+ module_function :hmtime
202
+
203
+ end
@@ -0,0 +1,3 @@
1
+ module RakeHDFS
2
+ VERSION = "0.1.1"
3
+ end
data/lib/rake_hdfs.rb ADDED
@@ -0,0 +1,5 @@
1
+ require "rake_hdfs/version"
2
+ require "rake_hdfs/hdfs_dsl_definition"
3
+ require "rake_hdfs/hdfs_file_creation_task"
4
+ require "rake_hdfs/hdfs_file_task"
5
+ require "rake_hdfs/hdfs_fileutils"
data/rake_hdfs.gemspec ADDED
@@ -0,0 +1,32 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'rake_hdfs/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "rake_hdfs"
8
+ spec.version = RakeHDFS::VERSION
9
+ spec.authors = ["chenkovsky.chen"]
10
+ spec.email = ["chenkovsky.chen@cootek.cn"]
11
+
12
+ spec.summary = %q{some rake dsl for hadoop}
13
+ spec.description = %q{some rake dsl for hadoop}
14
+ spec.homepage = "http://github.com/chenkovsky/rake_hdfs"
15
+
16
+ # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
17
+ # delete this section to allow pushing this gem to any host.
18
+ if spec.respond_to?(:metadata)
19
+ spec.metadata['allowed_push_host'] = "https://rubygems.org"
20
+ else
21
+ raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
22
+ end
23
+
24
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
+ spec.bindir = "exe"
26
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
+ spec.require_paths = ["lib"]
28
+
29
+ spec.add_development_dependency "bundler", "~> 1.9"
30
+ spec.add_development_dependency "rake", "~> 10.0"
31
+ spec.add_dependency "webhdfs", "~> 0.8.0"
32
+ end
metadata ADDED
@@ -0,0 +1,99 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rake_hdfs
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - chenkovsky.chen
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-03-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.9'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: webhdfs
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.8.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.8.0
55
+ description: some rake dsl for hadoop
56
+ email:
57
+ - chenkovsky.chen@cootek.cn
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".travis.yml"
64
+ - Gemfile
65
+ - README.md
66
+ - Rakefile
67
+ - lib/rake_hdfs.rb
68
+ - lib/rake_hdfs/hdfs_dsl_definition.rb
69
+ - lib/rake_hdfs/hdfs_file_creation_task.rb
70
+ - lib/rake_hdfs/hdfs_file_task.rb
71
+ - lib/rake_hdfs/hdfs_fileutils.rb
72
+ - lib/rake_hdfs/version.rb
73
+ - rake_hdfs.gemspec
74
+ homepage: http://github.com/chenkovsky/rake_hdfs
75
+ licenses: []
76
+ metadata:
77
+ allowed_push_host: https://rubygems.org
78
+ post_install_message:
79
+ rdoc_options: []
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ requirements: []
93
+ rubyforge_project:
94
+ rubygems_version: 2.5.1
95
+ signing_key:
96
+ specification_version: 4
97
+ summary: some rake dsl for hadoop
98
+ test_files: []
99
+ has_rdoc: