RubyGems - mobilize-base - Versions diffs - 1.0.94 → 1.1.0 - Mend

mobilize-base 1.0.94 → 1.1.0

Files changed (16) hide show

data/README.md +7 -7
data/lib/mobilize-base/extensions/google_drive/file.rb +12 -6
data/lib/mobilize-base/extensions/google_drive/worksheet.rb +20 -2
data/lib/mobilize-base/extensions/hash.rb +36 -42
data/lib/mobilize-base/handlers/gdrive.rb +4 -0
data/lib/mobilize-base/handlers/gfile.rb +31 -5
data/lib/mobilize-base/handlers/gridfs.rb +3 -3
data/lib/mobilize-base/handlers/gsheet.rb +9 -8
data/lib/mobilize-base/models/dataset.rb +6 -12
data/lib/mobilize-base/models/runner.rb +8 -2
data/lib/mobilize-base/models/stage.rb +47 -13
data/lib/mobilize-base/tasks.rb +4 -1
data/lib/mobilize-base/version.rb +1 -1
data/test/base_job_rows.yml +6 -7
data/test/mobilize-base_test.rb +3 -3
metadata +4 -4

data/README.md CHANGED Viewed

@@ -501,7 +501,7 @@ name>))` and enter values under each header:
 * status	Mobilize writes this field with the last status returned by the job
 * stage1..stage5 List of stages to be performed by the job.
-  * Stages have this syntax: <handler>.<call> <params>.
+  * Stages have this syntax: `<handler>.<call> <params>`.
     * handler specifies the file that should receive the stage
     * the call specifies the method within the file. The method should
 be called `"<handler>.<call>_by_stage_path"`
@@ -514,14 +514,14 @@ curly braces).
       * gsheet.read `source: <input_gsheet_full_path>`, which reads the sheet.
         * The gsheet_full_path should be of the form `<gbook_name>/<gsheet_name>`. The test uses
 "Requestor_mobilize(test)/base1_stage1.in".
-      * gsheet.write `source: <stage_relative_path>`,`target: <target_gsheet_path>`,
+      * gsheet.write `source: <stage_name>`,`target: <target_gsheet_path>`,
 which writes the specified stage output to the target_gsheet.
-        * The stage_relative_path should be of the form `<stage_column>` or
-`<job_name/stage_column>`. The test uses "base1/stage1" for the first test
-and simply "stage1" for the second test. Both of these take the output
-from the first stage.
+        * The stage_name should be of the form `<stage_column>`. The test uses "stage1" for the first test
+and "Runner_mobilize(test)/base1.out" for the second test. The first
+takes the output from the first stage and the second reads it straight
+from the referenced sheet.
         * The test uses "Requestor_mobilize(test)/base1.out" and
-"Requestor_mobilize(test)/base2.out" for target sheets.
+"Runner_mobilize(test)/base2.out" for target sheets.
 <a name='section_Start_Run_Test'></a>
 ### Run Test

data/lib/mobilize-base/extensions/google_drive/file.rb CHANGED Viewed

@@ -40,6 +40,16 @@ module GoogleDrive
       end
     end
+    def read(user)
+      f = self
+      entry = f.acl_entry("#{user}@#{Mobilize::Gdrive.domain}")
+      if entry and ['reader','writer','owner'].include?(entry.role)
+        f.download_to_string
+      else
+        raise "User #{user} is not allowed to read #{f.title}"
+      end
+    end
     def update_acl(email,role="writer")
       f = self
       #need these flags for HTTP retries
@@ -70,15 +80,11 @@ module GoogleDrive
       f = self
       f.acls.select{|a| ['group','user'].include?(a.scope_type) and a.scope == email}.first
     end
     def entry_hash
       f = self
       dfe_xml = f.document_feed_entry.to_xml
-      begin
-        Hash.from_xml(dfe_xml)[:entry]
-      rescue
-        {}
-      end
+      result = Nokogiri::XML(dfe_xml)
+      { result.root.name.to_sym => Hash.xml_node_to_hash(result.root)}[:entry]
     end
   end
 end

data/lib/mobilize-base/extensions/google_drive/worksheet.rb CHANGED Viewed

@@ -44,11 +44,15 @@ module GoogleDrive
       sheet.save
     end
-    def merge(merge_sheet)
+    def merge(merge_sheet,user)
       #write the top left of sheet
       #with the contents of merge_sheet
       sheet = self
       sheet.reload
+      entry = merge_sheet.spreadsheet.acl_entry("#{user}@#{Mobilize::Gdrive.domain}")
+      unless entry and ['writer','owner'].include?(entry.role)
+        raise "User #{user} is not allowed to write to #{merge_sheet.spreadsheet.title}"
+      end
       merge_sheet.reload
       curr_rows = sheet.num_rows
       curr_cols = sheet.num_cols
@@ -77,8 +81,22 @@ module GoogleDrive
       sheet.save
     end
-    def write(tsv)
+    def read(user)
       sheet = self
+      entry = sheet.spreadsheet.acl_entry("#{user}@#{Mobilize::Gdrive.domain}")
+      if entry and ['reader','writer','owner'].include?(entry.role)
+        sheet.to_tsv
+      else
+        raise "User #{user} is not allowed to read #{sheet.spreadsheet.title}"
+      end
+    end
+    def write(tsv,user)
+      sheet = self
+      entry = sheet.spreadsheet.acl_entry("#{user}@#{Mobilize::Gdrive.domain}")
+      unless entry and ['writer','owner'].include?(entry.role)
+        raise "User #{user} is not allowed to write to #{sheet.spreadsheet.title}"
+      end
       tsvrows = tsv.split("\n")
       #no rows, no write
       return true if tsvrows.length==0

data/lib/mobilize-base/extensions/hash.rb CHANGED Viewed

@@ -29,55 +29,49 @@ class Hash
     return self
   end
   # BEGIN methods to create hash from XML
-  class << self
-    def from_xml(xml_io)
-      begin
-        result = Nokogiri::XML(xml_io)
-        return { result.root.name.to_sym => xml_node_to_hash(result.root)}
-      rescue Exception => e
-        # raise your custom exception here
-      end
-    end
-    def xml_node_to_hash(node)
-      # If we are at the root of the document, start the hash
-      if node.element?
-        result_hash = {}
-        if node.attributes != {}
-          result_hash[:attributes] = {}
-          node.attributes.keys.each do |key|
-            result_hash[:attributes][node.attributes[key].name.to_sym] = prepare(node.attributes[key].value)
-          end
+  def Hash.from_xml(xml_io)
+    result = Nokogiri::XML(xml_io)
+    return { result.root.name.to_sym => Hash.xml_node_to_hash(result.root)}
+  end
+  def Hash.xml_node_to_hash(node)
+    # If we are at the root of the document, start the hash
+    if node.element?
+      result_hash = {}
+      if node.attributes != {}
+        result_hash[:attributes] = {}
+        node.attributes.keys.each do |key|
+          result_hash[:attributes][node.attributes[key].name.to_sym] = prepare(node.attributes[key].value)
         end
-        if node.children.size > 0
-          node.children.each do |child|
-            result = xml_node_to_hash(child)
+      end
+      if node.children.size > 0
+        node.children.each do |child|
+          result = xml_node_to_hash(child)
-            if child.name == "text"
-              unless child.next_sibling || child.previous_sibling
-                return prepare(result)
-              end
-            elsif result_hash[child.name.to_sym]
-              if result_hash[child.name.to_sym].is_a?(Object::Array)
-                result_hash[child.name.to_sym] << prepare(result)
-              else
-                result_hash[child.name.to_sym] = [result_hash[child.name.to_sym]] << prepare(result)
-              end
-            else
-              result_hash[child.name.to_sym] = prepare(result)
+          if child.name == "text"
+            unless child.next_sibling || child.previous_sibling
+              return prepare(result)
             end
+          elsif result_hash[child.name.to_sym]
+            if result_hash[child.name.to_sym].is_a?(Object::Array)
+              result_hash[child.name.to_sym] << prepare(result)
+            else
+              result_hash[child.name.to_sym] = [result_hash[child.name.to_sym]] << prepare(result)
+            end
+          else
+            result_hash[child.name.to_sym] = prepare(result)
           end
+        end
-          return result_hash
-        else
-          return result_hash
-        end
+        return result_hash
       else
-        return prepare(node.content.to_s)
+        return result_hash
       end
-    end
-    def prepare(data)
-      (data.class == String && data.to_i.to_s == data) ? data.to_i : data
-    end
+    else
+      return prepare(node.content.to_s)
+    end
+  end
+  def Hash.prepare(data)
+    (data.class == String && data.to_i.to_s == data) ? data.to_i : data
   end
   def to_struct(struct_name)
     Struct.new(struct_name,*keys).new(*values)

data/lib/mobilize-base/handlers/gdrive.rb CHANGED Viewed

@@ -12,6 +12,10 @@ module Mobilize
       [Gdrive.config['owner']['name'],Gdrive.domain].join("@")
     end
+    def Gdrive.owner_name
+      Gdrive.config['owner']['name']
+    end
     def Gdrive.password(email)
       if email == Gdrive.owner_email
         Gdrive.config['owner']['pw']

data/lib/mobilize-base/handlers/gfile.rb CHANGED Viewed

@@ -18,10 +18,6 @@ module Mobilize
       file.update_acl(gdrive_slot,role)
     end
-    def Gfile.find_by_path(path,gdrive_slot)
-      Gdrive.files(gdrive_slot,{"title"=>path,"title-exact"=>"true"}).first
-    end
     def Gfile.read_by_stage_path(stage_path)
       #reserve gdrive_slot account for read
       gdrive_slot = Gdrive.slot_worker_by_path(s.path)
@@ -31,7 +27,37 @@ module Mobilize
       out_tsv = Gfile.find_by_path(gfile_path,gdrive_slot).read
       #use Gridfs to cache result
       out_url = "gridfs://#{s.path}/out"
-      Dataset.write_to_url(out_url,out_tsv)
+      Dataset.write_by_url(out_url,out_tsv,s.job.runner.user.name)
+    end
+    def Gfile.find_by_path(path)
+      #file must be owned by owner
+      gdrive_slot = Gdrive.owner_email
+      files = Gdrive.files(gdrive_slot,{"title"=>path,"title-exact"=>"true"})
+      dst = Dataset.find_or_create_by_handler_and_path('gfile',path)
+      #there should only be one file with each path, otherwise we have fail
+      file = nil
+      if files.length>1
+        #keep most recent file, delete the rest
+        files.sort_by do |f|
+          (f.entry_hash[:published] || Time.now).to_time
+          end.reverse.each_with_index do |f,f_i|
+          if f_i == 0
+            file = f
+          else
+            #delete the old file
+            f.delete
+            ("Deleted duplicate file #{path}").oputs
+          end
+        end
+      else
+        file = files.first
+      end
+      #always make sure dataset http URL is up to date
+      #and that it has admin acl
+      dst.update_attributes(:http_url=>file.human_url)
+      file.add_admin_acl
+      return file
     end
   end
 end

data/lib/mobilize-base/handlers/gridfs.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module Mobilize
       return ::Mongo::GridFileSystem.new(::Mongo::Connection.new(host,port).db(database_name))
     end
-    def Gridfs.read_by_dataset_path(dst_path)
+    def Gridfs.read_by_dataset_path(dst_path,user)
       begin
         zs=Gridfs.grid.open(dst_path,'r').read
         return ::Zlib::Inflate.inflate(zs)
@@ -20,10 +20,10 @@ module Mobilize
       end
     end
-    def Gridfs.write_by_dataset_path(dst_path,string)
+    def Gridfs.write_by_dataset_path(dst_path,string,user)
       zs = ::Zlib::Deflate.deflate(string)
       raise "compressed string too large for Gridfs write" if zs.length > Gridfs.config['max_compressed_write_size']
-      curr_zs = Gridfs.read_by_dataset_path(dst_path).to_s
+      curr_zs = Gridfs.read_by_dataset_path(dst_path,user).to_s
       #write a new version when there is a change
       if curr_zs != zs
         Gridfs.grid.open(dst_path,'w',:versions => Gridfs.config['max_versions']){|f| f.write(zs)}

data/lib/mobilize-base/handlers/gsheet.rb CHANGED Viewed

@@ -37,11 +37,12 @@ module Mobilize
       gdrive_slot = Gdrive.slot_worker_by_path(stage_path)
       return false unless gdrive_slot
       s = Stage.where(:path=>stage_path).first
+      user = s.job.runner.user.name
       gsheet_path = s.params['source']
-      out_tsv = Gsheet.find_by_path(gsheet_path,gdrive_slot).to_tsv
+      out_tsv = Gsheet.find_by_path(gsheet_path,gdrive_slot).read(user)
       #use Gridfs to cache result
       out_url = "gridfs://#{s.path}/out"
-      Dataset.write_to_url(out_url,out_tsv)
+      Dataset.write_by_url(out_url,out_tsv,Gdrive.owner_name)
     end
     def Gsheet.write_by_stage_path(stage_path)
@@ -49,24 +50,24 @@ module Mobilize
       #return blank response if there are no slots available
       return nil unless gdrive_slot
       s = Stage.where(:path=>stage_path).first
-      source_path = s.params['source']
+      user = s.job.runner.user.name
       target_path = s.params['target']
-      source_dst = s.source_dst(source_path)
-      tsv = source_dst.read
+      source_dst = s.source_dsts(gdrive_slot).first
+      tsv = source_dst.read(user)
       sheet_name = target_path.split("/").last
       temp_path = [stage_path.gridsafe,sheet_name].join("/")
       temp_sheet = Gsheet.find_or_create_by_path(temp_path,gdrive_slot)
-      temp_sheet.write(tsv)
+      temp_sheet.write(tsv,Gdrive.owner_name)
       temp_sheet.check_and_fix(tsv)
       target_sheet = Gsheet.find_or_create_by_path(target_path,gdrive_slot)
-      target_sheet.merge(temp_sheet)
+      target_sheet.merge(temp_sheet,user)
       #delete the temp sheet's book
       temp_sheet.spreadsheet.delete
       status = "Write successful for #{target_path}"
       s.update_status(status)
       #use Gridfs to cache result
       out_url = "gridfs://#{s.path}/out"
-      Dataset.write_to_url(out_url,status)
+      Dataset.write_by_url(out_url,status,Gdrive.owner_name)
     end
   end
 end

data/lib/mobilize-base/models/dataset.rb CHANGED Viewed

@@ -38,30 +38,24 @@ module Mobilize
       return dst
     end
-    def Dataset.write_to_url(url,string)
+    def Dataset.write_by_url(url,string,user)
       dst = Dataset.find_or_create_by_url(url)
-      dst.write(string)
+      dst.write(string,user)
       url
     end
-    def read
+    def read(user)
       dst = self
       dst.update_attributes(:last_read_at=>Time.now.utc)
-      "Mobilize::#{dst.handler.humanize}".constantize.read_by_dataset_path(dst.path)
+      "Mobilize::#{dst.handler.humanize}".constantize.read_by_dataset_path(dst.path,user)
     end
-    def write(string)
+    def write(string,user)
       dst = self
-      "Mobilize::#{dst.handler.humanize}".constantize.write_by_dataset_path(dst.path,string)
+      "Mobilize::#{dst.handler.humanize}".constantize.write_by_dataset_path(dst.path,string,user)
       dst.raw_size = string.length
       dst.save!
       return true
     end
-    def delete
-      dst = self
-      "Mobilize::#{dst.handler.humanize}".constantize.delete_by_dataset_path(dst.path)
-      return true
-    end
   end
 end

data/lib/mobilize-base/models/runner.rb CHANGED Viewed

@@ -72,6 +72,12 @@ module Mobilize
       Dataset.find_or_create_by_url("gridfs://#{r.path}")
     end
+    def gbook(gdrive_slot)
+      r = self
+      title = r.path.split("/").first
+      Gbook.find_all_by_path(title,gdrive_slot).first
+    end
     def gsheet(gdrive_slot)
       r = self
       jobs_sheet = Gsheet.find_or_create_by_path(r.path,gdrive_slot)
@@ -82,9 +88,9 @@ module Mobilize
     def read_gsheet(gdrive_slot)
       r = self
-      gsheet_tsv = r.gsheet(gdrive_slot).to_tsv
+      gsheet_tsv = r.gsheet(gdrive_slot).read(Gdrive.owner_name)
       #cache in DB
-      r.cache.write(gsheet_tsv)
+      r.cache.write(gsheet_tsv,Gdrive.owner_name)
       #turn it into a hash array
       gsheet_jobs = gsheet_tsv.tsv_to_hash_array
       #go through each job, update relevant job with its params

data/lib/mobilize-base/models/stage.rb CHANGED Viewed

@@ -68,19 +68,6 @@ module Mobilize
       return j.stages[s.idx]
     end
-    def source_dst(source_path)
-      #gets dataset based on path given in source parameter
-      s = self
-      source_job_name, source_stage_name = if source_path.index("/")
-                                            source_path.split("/")
-                                          else
-                                            [nil, source_path]
-                                          end
-      source_stage_path = "#{s.job.runner.path}/#{source_job_name || s.job.name}/#{source_stage_name}"
-      source_stage = Stage.where(:path=>source_stage_path).first
-      source_stage.out_dst
-    end
     def Stage.perform(id,*args)
       s = Stage.where(:path=>id).first
       j = s.job
@@ -118,6 +105,53 @@ module Mobilize
       return true
     end
+    def source_dsts(gdrive_slot)
+      #returns an array of Datasets corresponding to
+      #gridfs caches for stage outputs, gsheets and gfiles
+      #or dataset pointers for other handlers
+      s = self
+      params = s.params
+      source_paths = if params['sources']
+                       params['sources']
+                     elsif params['source']
+                       [params['source']]
+                     end
+      user = s.job.runner.user.name
+      return [] if (source_paths.class!=Array or source_paths.length==0)
+      dsts = []
+      source_paths.each do |source_path|
+        if source_path.index(/^stage[1-5]$/)
+          source_stage_path = "#{s.job.runner.path}/#{s.job.name}/#{source_path}"
+          source_stage = Stage.where(:path=>source_stage_path).first
+          dsts << source_stage.out_dst
+        elsif source_path.index("://")
+          #find or create by url
+          dsts << Dataset.find_or_create_by_url(source_path)
+        else
+          if source_path.index("/")
+            #slashes mean sheets
+            out_tsv = Gsheet.find_by_path(source_path,gdrive_slot).read(user)
+          else
+            #check sheets in runner
+            r = s.job.runner
+            runner_sheet = r.gbook.worksheet_by_title(source_path)
+            out_tsv = if runner_sheet
+                        runner_sheet.read(user)
+                      else
+                        #check for gfile. will fail if there isn't one.
+                        Gfile.find_by_path(source_path).read(user)
+                      end
+          end
+          #use Gridfs to cache gdrive results
+          file_name = source_path.split("/").last
+          out_url = "gridfs://#{s.path}/#{file_name}"
+          Dataset.write_by_url(out_url,out_tsv,user)
+          dsts << Dataset.find_by_url(out_url)
+        end
+      end
+      return dsts
+    end
     def enqueue!
       s = self
       ::Resque::Job.create("mobilize",Stage,s.path,{})

data/lib/mobilize-base/tasks.rb CHANGED Viewed

@@ -65,7 +65,10 @@ namespace :mobilize_base do
     resque_web_extension_path = "#{full_config_dir}resque_web.rb"
     #kill any resque-web for now
     `ps aux | grep resque-web | awk '{print $2}' | xargs kill`
-    command = "bundle exec resque-web -p #{port.to_s} #{resque_web_extension_path}"
+    resque_redis_port_args = if Mobilize::Base.env == 'test'
+                               " -r localhost:#{Mobilize::Base.config('resque')['redis_port']}"
+                             end.to_s
+    command = "bundle exec resque-web -p #{port.to_s} #{resque_web_extension_path} #{resque_redis_port_args}"
     `#{command}`
   end
   desc "create indexes for all base models in mongodb"

data/lib/mobilize-base/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Mobilize
   module Base
-    VERSION = "1.0.94"
+    VERSION = "1.1.0"
   end
 end

data/test/base_job_rows.yml CHANGED Viewed

@@ -1,13 +1,12 @@
-- name: "base1"
+- name: base1
   active: true
   trigger: once
   status: ""
-  stage1: 'gsheet.read source:"Runner_mobilize(test)/base1_stage1.in"'
-  stage2: 'gsheet.write source:"base1/stage1", target:"Runner_mobilize(test)/base1.out"'
+  stage1: gsheet.read source:"Runner_mobilize(test)/base1_stage1.in"
+  stage2: gsheet.write source:"stage1", target:"Runner_mobilize(test)/base1.out"
-- name: "base2"
+- name: base2
   active: true
-  trigger: "after base1"
+  trigger: after base1
   status: ""
-  stage1: 'gsheet.read source:"Runner_mobilize(test)/base1.out"'
-  stage2: 'gsheet.write source:"stage1", target:"Runner_mobilize(test)/base2.out"'
+  stage1: gsheet.write source:"Runner_mobilize(test)/base1.out", target:"Runner_mobilize(test)/base2.out"

data/test/mobilize-base_test.rb CHANGED Viewed

@@ -31,7 +31,7 @@ describe "Mobilize" do
     puts "Jobtracker created runner with 'jobs' sheet?"
     r = u.runner
     jobs_sheet = r.gsheet(gdrive_slot)
-    tsv = jobs_sheet.to_tsv
+    tsv = jobs_sheet.read(Mobilize::Gdrive.owner_name)
     assert tsv.length == 61 #headers only
     puts "add base1_stage1 input sheet"
@@ -39,7 +39,7 @@ describe "Mobilize" do
     test_source_ha = ::YAML.load_file("#{Mobilize::Base.root}/test/base1_stage1.yml")*40
     test_source_tsv = test_source_ha.hash_array_to_tsv
-    test_source_sheet.write(test_source_tsv)
+    test_source_sheet.write(test_source_tsv,Mobilize::Gdrive.owner_name)
     puts "add row to jobs sheet, wait 150s"
     test_job_rows = ::YAML.load_file("#{Mobilize::Base.root}/test/base_job_rows.yml")
@@ -57,7 +57,7 @@ describe "Mobilize" do
     jobs_sheet.add_or_update_rows([{'name'=>'base1','active'=>true}])
     sleep 120
     test_target_sheet_2 = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/base1.out",gdrive_slot)
     puts "jobtracker posted test sheet data to test destination, and checksum succeeded?"
     assert test_target_sheet_2.to_tsv == test_source_sheet.to_tsv

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: mobilize-base
 version: !ruby/object:Gem::Version
-  version: 1.0.94
+  version: 1.1.0
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-01-08 00:00:00.000000000 Z
+date: 2013-01-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -238,7 +238,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: -2093825275385186120
+      hash: -781501523970053172
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements:
@@ -247,7 +247,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: -2093825275385186120
+      hash: -781501523970053172
 requirements: []
 rubyforge_project: mobilize-base
 rubygems_version: 1.8.24