rake_hdfs 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1310a15bf3d66d81d1b6b51ff81c72222369100d
4
+ data.tar.gz: b59b9a58c2baac58adf6a0476607c4252b5a3b71
5
+ SHA512:
6
+ metadata.gz: 335720d67d9b386cf63945bb36819b745be82ab1c8d9e6f1c3e96c85f36c93d9b0b1772bbac70a6fadc500bb0487fe8df5d05860a6360a38dc904f00b6f8bd76
7
+ data.tar.gz: 75311d15ac8fe3d460f3644ff32623ccb8242ec46764ae61c98bd206f82f90f224a08068227aa79f8de05aa3b6be13f6de721180d8fc615cced466b2146d64b0
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.3.0
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in rake_hadoop.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,71 @@
1
+ # RakeHdfs
2
+
3
+ this gem makes rake run on hdfs file system. it's based on webhdfs.
4
+
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ ```ruby
11
+ gem 'rake_hdfs'
12
+ ```
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install rake_hdfs
21
+
22
+ ## Usage
23
+
24
+ ```ruby
25
+ require "webhdfs/fileutils"
26
+ require "rake_hdfs"
27
+ $dir = "/user/chenkovsky.chen"
28
+
29
+ WebHDFS::FileUtils.set_server("localhost", 50070, "chenkovsky", nil)
30
+
31
+ hdirectory "#{$dir}/tmp_dir"
32
+ desc "test hdfs rake"
33
+ hfile "#{$dir}/tmp_dir/tmp.txt" => ["#{$dir}/tmp_dir"] do
34
+ raise "tmp file should not exist." if hexist? "#{$dir}/tmp_dir/tmp.txt"
35
+ files = hls $dir
36
+ puts files
37
+ dir_mtime = hmtime $dir
38
+ puts dir_mtime
39
+ hcopy_from_local "tmp.txt", "#{$dir}/tmp_dir/tmp.txt"
40
+ hcopy_from_local_via_stream "tmp.txt", "#{$dir}/tmp_dir/tmp2.txt"
41
+
42
+ hcopy_to_local "#{$dir}/tmp_dir/tmp.txt", "tmp3.txt"
43
+
44
+ happend("#{$dir}/tmp_dir/tmp2.txt", "hahaha")
45
+
46
+ hmkdir "#{$dir}/tmp2_dir"
47
+
48
+ raise "tmp2_dir should not exist." if not hexist? "#{$dir}/tmp2_dir"
49
+
50
+ hrm "#{$dir}/tmp2_dir"
51
+ hmkdir "#{$dir}/tmp3_dir"
52
+ hcopy_from_local "tmp.txt", "#{$dir}/tmp3_dir/tmp.txt"
53
+
54
+ hrmr "#{$dir}/tmp3_dir"
55
+
56
+ hrename "#{$dir}/tmp_dir/tmp2.txt", "#{$dir}/tmp_dir/tmp4.txt"
57
+
58
+ hchmod 0755, "#{$dir}/tmp_dir/tmp4.txt"
59
+
60
+ puts (hstat "#{$dir}/tmp_dir/tmp4.txt")
61
+ raise "not correct uptodate" unless huptodate? "#{$dir}/tmp_dir/tmp4.txt", ["#{$dir}/tmp_dir/tmp.txt"]
62
+ end
63
+ ```
64
+
65
+ ## Contributing
66
+
67
+ 1. Fork it ( https://github.com/chenkovsky/rake_hdfs/fork )
68
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
69
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
70
+ 4. Push to the branch (`git push origin my-new-feature`)
71
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ gem "webhdfs"
@@ -0,0 +1,24 @@
1
+ require 'rake/file_utils_ext'
2
+ module Rake
3
+ module DSL
4
+ def hfile(*args, &block) # :doc:
5
+ Rake::HDFSFileTask.define_task(*args, &block)
6
+ end
7
+
8
+ def hfile_create(*args, &block)
9
+ Rake::HDFSFileCreationTask.define_task(*args, &block)
10
+ end
11
+
12
+ def hdirectory(*args, &block) # :doc:
13
+ result = hfile_create(*args, &block)
14
+ dir, _ = *Rake.application.resolve_args(args)
15
+ dir = Rake.from_pathname(dir)
16
+ Rake.each_dir_parent(dir) do |d|
17
+ hfile_create d do |t|
18
+ hmkdir_p t.name unless WebHDFS::FileUtils.exist?(t.name)
19
+ end
20
+ end
21
+ result
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,23 @@
1
+ require 'rake/file_task'
2
+ require 'rake/early_time'
3
+
4
+ module Rake
5
+
6
+ # A FileCreationTask is a file task that when used as a dependency will be
7
+ # needed if and only if the file has not been created. Once created, it is
8
+ # not re-triggered if any of its dependencies are newer, nor does trigger
9
+ # any rebuilds of tasks that depend on it whenever it is updated.
10
+ #
11
+ class HDFSFileCreationTask < FileTask
12
+ # Is this file task needed? Yes if it doesn't exist.
13
+ def needed?
14
+ ! WebHDFS::FileUtils.exist?(name)
15
+ end
16
+
17
+ # Time stamp for file creation task. This time stamp is earlier
18
+ # than any other time stamp.
19
+ def timestamp
20
+ Rake::EARLY
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,46 @@
1
+ require 'rake/task.rb'
2
+ require 'rake/early_time'
3
+
4
+ module Rake
5
+
6
+ # A FileTask is a task that includes time based dependencies. If any of a
7
+ # FileTask's prerequisites have a timestamp that is later than the file
8
+ # represented by this task, then the file must be rebuilt (using the
9
+ # supplied actions).
10
+ #
11
+ class HDFSFileTask < Task
12
+
13
+ # Is this file task needed? Yes if it doesn't exist, or if its time stamp
14
+ # is out of date.
15
+ def needed?
16
+ ! WebHDFS::FileUtils.exist?(name) || out_of_date?(timestamp) || @application.options.build_all
17
+ end
18
+
19
+ # Time stamp for file task.
20
+ def timestamp
21
+ if WebHDFS::FileUtils.exist?(name)
22
+ WebHDFS::FileUtils.mtime(name.to_s)
23
+ else
24
+ Rake::LATE
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ # Are there any prerequisites with a later time than the given time stamp?
31
+ def out_of_date?(stamp)
32
+ @prerequisites.any? { |n| application[n, @scope].timestamp > stamp }
33
+ end
34
+
35
+ # ----------------------------------------------------------------
36
+ # Task class methods.
37
+ #
38
+ class << self
39
+ # Apply the scope to the task name according to the rules for this kind
40
+ # of task. File based tasks ignore the scope when creating the name.
41
+ def scope_name(scope, task_name)
42
+ Rake.from_pathname(task_name)
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,203 @@
1
+ require "webhdfs/fileutils"
2
+ module WebHDFS
3
+ module FileUtils
4
+ def exist?(file, options={})
5
+ begin
6
+ client.stat(file, options)
7
+ return true
8
+ rescue FileNotFoundError => e
9
+ return false
10
+ end
11
+ end
12
+ module_function :exist?
13
+ def ls(path, options={})
14
+ opts = options.dup
15
+ fu_log "ls #{path}" if opts.delete(:verbose)
16
+ client.list(path, options)
17
+ end
18
+ module_function :ls
19
+
20
+ def stat(path, options={})
21
+ opts = options.dup
22
+ fu_log "stat #{path}" if opts.delete(:verbose)
23
+ client.stat(path, options)
24
+ end
25
+ module_function :stat
26
+
27
+ def uptodate?(new, old_list, options = {})
28
+ return false unless exist?(new)
29
+ new_time = mtime(new)
30
+ old_list.each do |old|
31
+ if exist?(old)
32
+ return false unless new_time > mtime(old)
33
+ end
34
+ end
35
+ true
36
+ end
37
+ module_function :uptodate?
38
+
39
+ def mtime(path, options = {})
40
+ file_stat = stat(path, options)
41
+ file_stat["modificationTime"]
42
+ end
43
+ module_function :mtime
44
+ end
45
+ end
46
+
47
+ module FileUtils
48
+ def hdfs_copy_from_local(file, path, options={})
49
+ WebHDFS::FileUtils.copy_from_local(file, path, options)
50
+ end
51
+ module_function :hdfs_copy_from_local
52
+
53
+ alias :hcopy_from_local :hdfs_copy_from_local
54
+ module_function :hcopy_from_local
55
+
56
+ def hdfs_copy_from_local_via_stream(file, path, options={})
57
+ WebHDFS::FileUtils.copy_from_local_via_stream(file, path, options)
58
+ end
59
+ module_function :hdfs_copy_from_local_via_stream
60
+ alias :hcopy_from_local_via_stream :hdfs_copy_from_local_via_stream
61
+ module_function :hcopy_from_local_via_stream
62
+
63
+ def hdfs_copy_to_local(path, file, options={})
64
+ WebHDFS::FileUtils.copy_to_local(path, file, options)
65
+ end
66
+
67
+ module_function :hdfs_copy_to_local
68
+
69
+ alias :hcopy_to_local :hdfs_copy_to_local
70
+ module_function :hcopy_to_local
71
+
72
+ def hdfs_append(path, body, options={})
73
+ WebHDFS::FileUtils.append(path,body,options)
74
+ end
75
+ module_function :hdfs_append
76
+
77
+ alias :happend :hdfs_append
78
+ module_function :happend
79
+
80
+ def hdfs_mkdir(list, options={})
81
+ WebHDFS::FileUtils.mkdir(list, options)
82
+ end
83
+
84
+ module_function :hdfs_mkdir
85
+
86
+ alias :hmkdir :hdfs_mkdir
87
+ module_function :hmkdir
88
+
89
+ alias :hdfs_mkdir_p :hdfs_mkdir
90
+ module_function :hdfs_mkdir_p
91
+
92
+ alias :hmkdir_p :hdfs_mkdir
93
+ module_function :hmkdir_p
94
+
95
+ def hdfs_rm(list, options={})
96
+ WebHDFS::FileUtils.rm(list, options)
97
+ end
98
+ module_function :hdfs_rm
99
+ alias :hrm :hdfs_rm
100
+ module_function :hrm
101
+
102
+ def hdfs_rmr(list, options={})
103
+ WebHDFS::FileUtils.rmr(list, options)
104
+ end
105
+ module_function :hdfs_rmr
106
+
107
+ alias :hrmr :hdfs_rmr
108
+ module_function :hrmr
109
+
110
+ def hdfs_rename(src, dst, options={})
111
+ WebHDFS::FileUtils.rename(src, dst, options)
112
+ end
113
+
114
+ module_function :hdfs_rename
115
+
116
+ alias :hrename :hdfs_rename
117
+ module_function :hrename
118
+
119
+ def hdfs_chmod(mode, list, options={})
120
+ WebHDFS::FileUtils.chmod(mode, list, options)
121
+ end
122
+
123
+ module_function :hdfs_chmod
124
+
125
+ alias :hchmod :hdfs_chmod
126
+ module_function :hchmod
127
+
128
+ def hdfs_chown(user, group, list, options={})
129
+ WebHDFS::FileUtils.chown(user, group, list, options)
130
+ end
131
+
132
+ module_function :hdfs_chown
133
+
134
+ alias :hchown :hdfs_chown
135
+ module_function :hchown
136
+
137
+ def hdfs_set_repl_factor(list, num, options={})
138
+ WebHDFS::FileUtils.set_repl_factor(list, num, options)
139
+ end
140
+ module_function :hdfs_set_repl_factor
141
+
142
+ alias :hset_repl_factor :hdfs_set_repl_factor
143
+ module_function :hset_repl_factor
144
+
145
+ def hdfs_set_atime(list, time, options={})
146
+ WebHDFS::FileUtils.set_atime(list, time, options)
147
+ end
148
+
149
+ module_function :hdfs_set_atime
150
+
151
+ alias :hset_atime :hdfs_set_atime
152
+ module_function :hset_atime
153
+
154
+ def hdfs_set_mtime(list, time, options={})
155
+ WebHDFS::FileUtils.set_mtime(list, time, options)
156
+ end
157
+
158
+ module_function :hdfs_set_mtime
159
+
160
+ alias :hset_mtime :hdfs_set_mtime
161
+ module_function :hset_mtime
162
+
163
+ def hdfs_ls(path, options={})
164
+ WebHDFS::FileUtils.ls(path, options)
165
+ end
166
+
167
+ module_function :hdfs_ls
168
+ alias :hls :hdfs_ls
169
+ module_function :hls
170
+
171
+ def hdfs_stat(path, options={})
172
+ WebHDFS::FileUtils.stat(path, options)
173
+ end
174
+
175
+ module_function :hdfs_stat
176
+ alias :hstat :hdfs_stat
177
+ module_function :hstat
178
+
179
+ def hdfs_exist?(path, options={})
180
+ WebHDFS::FileUtils.exist?(path, options)
181
+ end
182
+
183
+ module_function :hdfs_exist?
184
+ alias :hexist? :hdfs_exist?
185
+ module_function :hexist?
186
+
187
+ def hdfs_uptodate?(new, old_list, options={})
188
+ WebHDFS::FileUtils.uptodate?(new, old_list, options)
189
+ end
190
+
191
+ module_function :hdfs_uptodate?
192
+ alias :huptodate? :hdfs_uptodate?
193
+ module_function :huptodate?
194
+
195
+ def hdfs_mtime(path, options={})
196
+ WebHDFS::FileUtils.mtime(path, options)
197
+ end
198
+
199
+ module_function :hdfs_mtime
200
+ alias :hmtime :hdfs_mtime
201
+ module_function :hmtime
202
+
203
+ end
@@ -0,0 +1,3 @@
1
+ module RakeHDFS
2
+ VERSION = "0.1.1"
3
+ end
data/lib/rake_hdfs.rb ADDED
@@ -0,0 +1,5 @@
1
+ require "rake_hdfs/version"
2
+ require "rake_hdfs/hdfs_dsl_definition"
3
+ require "rake_hdfs/hdfs_file_creation_task"
4
+ require "rake_hdfs/hdfs_file_task"
5
+ require "rake_hdfs/hdfs_fileutils"
data/rake_hdfs.gemspec ADDED
@@ -0,0 +1,32 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'rake_hdfs/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "rake_hdfs"
8
+ spec.version = RakeHDFS::VERSION
9
+ spec.authors = ["chenkovsky.chen"]
10
+ spec.email = ["chenkovsky.chen@cootek.cn"]
11
+
12
+ spec.summary = %q{some rake dsl for hadoop}
13
+ spec.description = %q{some rake dsl for hadoop}
14
+ spec.homepage = "http://github.com/chenkovsky/rake_hdfs"
15
+
16
+ # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
17
+ # delete this section to allow pushing this gem to any host.
18
+ if spec.respond_to?(:metadata)
19
+ spec.metadata['allowed_push_host'] = "https://rubygems.org"
20
+ else
21
+ raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
22
+ end
23
+
24
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
+ spec.bindir = "exe"
26
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
+ spec.require_paths = ["lib"]
28
+
29
+ spec.add_development_dependency "bundler", "~> 1.9"
30
+ spec.add_development_dependency "rake", "~> 10.0"
31
+ spec.add_dependency "webhdfs", "~> 0.8.0"
32
+ end
metadata ADDED
@@ -0,0 +1,99 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rake_hdfs
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - chenkovsky.chen
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-03-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.9'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: webhdfs
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.8.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.8.0
55
+ description: some rake dsl for hadoop
56
+ email:
57
+ - chenkovsky.chen@cootek.cn
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".travis.yml"
64
+ - Gemfile
65
+ - README.md
66
+ - Rakefile
67
+ - lib/rake_hdfs.rb
68
+ - lib/rake_hdfs/hdfs_dsl_definition.rb
69
+ - lib/rake_hdfs/hdfs_file_creation_task.rb
70
+ - lib/rake_hdfs/hdfs_file_task.rb
71
+ - lib/rake_hdfs/hdfs_fileutils.rb
72
+ - lib/rake_hdfs/version.rb
73
+ - rake_hdfs.gemspec
74
+ homepage: http://github.com/chenkovsky/rake_hdfs
75
+ licenses: []
76
+ metadata:
77
+ allowed_push_host: https://rubygems.org
78
+ post_install_message:
79
+ rdoc_options: []
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ requirements: []
93
+ rubyforge_project:
94
+ rubygems_version: 2.5.1
95
+ signing_key:
96
+ specification_version: 4
97
+ summary: some rake dsl for hadoop
98
+ test_files: []
99
+ has_rdoc: