crabfarm 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: effa8e1c5802af234c1fa733db6c2e1adea6ee5b
4
- data.tar.gz: b988a4ddc01841d41cafd4225942820fd8a10108
3
+ metadata.gz: fff9d93bc19a725f5fd02a5460ceafe44c3c7694
4
+ data.tar.gz: 46a109065b2018ed7a1a1bac296d0c53fba4e6d5
5
5
  SHA512:
6
- metadata.gz: 43b9f2b0a3157ca68addadc895fc77de7f0c025205095b4ecc3027630bb71a8994dc84c5b0de32f1a135f2a10c5fcfc4e9cf1220307f95e0a008ad77e4c01383
7
- data.tar.gz: 01863c91b81858759875f6d12890c9c3c2a1a846618df9dbf81c2a3632f5174018adcc2a2b13cc1bd4d10d8b0f275a0ffed900e0b970409ca5d690b6a548acb8
6
+ metadata.gz: d756345e28af5f03a2a5bb9404035c6262ca0e192606939e8d8daa5968fca2a3c6c9faab52d3a81c4bf4bcf54579a5d7d031af12c86016b9ec3461ef17edd6f6
7
+ data.tar.gz: 79f8db6736962460721927082c49d9060aebcdfdc8b8773101427a113ee1b0e08c8065cbbd7715afc72d9e70af02adf10f6bd436e3af526ef85b3655c1a84ca3
@@ -2,11 +2,19 @@ require 'nokogiri'
2
2
 
3
3
  module Crabfarm
4
4
  class NokogiriAdapter
5
- def self.parse(_element)
6
- if _element.respond_to? :to_html
7
- Nokogiri::HTML _element.to_html
5
+ def self.format
6
+ 'html'
7
+ end
8
+
9
+ def self.parse(_raw)
10
+ Nokogiri::HTML _raw
11
+ end
12
+
13
+ def self.preprocess_parsing_target(_target)
14
+ if _target.respond_to? :to_html
15
+ _target.to_html
8
16
  else
9
- Nokogiri::HTML _element
17
+ _target
10
18
  end
11
19
  end
12
20
  end
@@ -2,8 +2,16 @@ require "pdf-reader"
2
2
 
3
3
  module Crabfarm
4
4
  class PdfReaderAdapter
5
- def self.parse(_pdf_data)
6
- PDF::Reader.new StringIO.new _pdf_data
5
+ def self.format
6
+ 'pdf'
7
+ end
8
+
9
+ def self.parse(_raw)
10
+ PDF::Reader.new StringIO.new _raw
11
+ end
12
+
13
+ def self.preprocess_parsing_target(_target)
14
+ _target
7
15
  end
8
16
  end
9
17
  end
@@ -6,13 +6,26 @@ module Crabfarm
6
6
 
7
7
  attr_reader :params, :document
8
8
 
9
- def self.engine(_engine)
10
- @engine = _engine
9
+ def self.parser_engine(_engine=nil)
10
+ @engine_name = _engine
11
+ end
12
+
13
+ def self.engine
14
+ @engine ||= Strategies.load(:parser_engine, @engine_name || Crabfarm.config.parser_engine)
15
+ end
16
+
17
+ def self.snapshot_path(_name=nil)
18
+ _name = self.to_s.underscore if _name.nil?
19
+ File.join(GlobalState.snapshots_path, _name + '.' + engine.format)
20
+ end
21
+
22
+ def engine
23
+ self.class.engine
11
24
  end
12
25
 
13
26
  def initialize(_target, _params)
14
- engine_class = Strategies.load(:parser_engine, class_engine || Crabfarm.config.parser_engine)
15
- @document = engine_class.parse _target
27
+ @parsed_data = engine.preprocess_parsing_target _target
28
+ @document = engine.parse @parsed_data
16
29
  @params = _params
17
30
 
18
31
  super @document
@@ -22,6 +35,18 @@ module Crabfarm
22
35
  raise NotImplementedError.new
23
36
  end
24
37
 
38
+ def take_snapshot(_name=nil)
39
+ file_path = self.class.snapshot_path _name
40
+
41
+ raise ArgumentError.new "Snapshot already exists '#{file_path}', make sure to implement the #{self.class.to_s} parse method." if File.exist? file_path
42
+
43
+ dir_path = file_path.split(File::SEPARATOR)[0...-1]
44
+ FileUtils.mkpath dir_path.join(File::SEPARATOR) if dir_path.length > 0
45
+
46
+ File.write file_path, @parsed_data
47
+ nil
48
+ end
49
+
25
50
  def __getobj__
26
51
  @document
27
52
  end
@@ -30,10 +55,5 @@ module Crabfarm
30
55
  @document = obj
31
56
  end
32
57
 
33
- private
34
-
35
- def class_engine
36
- self.class.instance_variable_get :@engine
37
- end
38
58
  end
39
59
  end
@@ -115,7 +115,7 @@ module Crabfarm
115
115
  next puts "This command can only be ran inside a crabfarm application" unless GlobalState.inside_crawler_app?
116
116
 
117
117
  require "crabfarm/modes/recorder"
118
- Crabfarm::Modes::Recorder.start GlobalState.memento_path(args[0]), options[:playback]
118
+ Crabfarm::Modes::Recorder.start args[0], options[:playback]
119
119
  end
120
120
  end
121
121
 
@@ -21,7 +21,8 @@ module Crabfarm
21
21
 
22
22
  def load_crabtrap_context(_memento)
23
23
  require 'crabfarm/crabtrap_context'
24
- m_path = GlobalState.memento_path _memento
24
+ require 'crabfarm/modes/recorder'
25
+ m_path = Modes::Recorder.memento_path _memento
25
26
  raise ResourceNotFoundError.new "Could not find memento '#{_name}'" unless File.exists? m_path
26
27
  Crabfarm::CrabtrapContext.new :replay, m_path
27
28
  end
@@ -9,14 +9,12 @@ module Crabfarm
9
9
  CF_PATH
10
10
  end
11
11
 
12
- def memento_path(_name)
13
- return nil if _name.nil?
14
- File.join(app_path, 'spec/mementos', _name + '.json.gz')
12
+ def mementos_path
13
+ File.join(app_path, 'spec/mementos')
15
14
  end
16
15
 
17
- def snapshot_path(_file_name)
18
- return nil if _file_name.nil?
19
- File.join(app_path, 'spec/snapshots', _file_name)
16
+ def snapshots_path
17
+ File.join app_path, 'spec/snapshots'
20
18
  end
21
19
 
22
20
  extend self
@@ -6,14 +6,20 @@ module Crabfarm
6
6
  module Modes
7
7
  module Recorder
8
8
 
9
+ def self.memento_path(_name)
10
+ File.join(GlobalState.mementos_path, _name + '.json.gz')
11
+ end
12
+
9
13
  def self.start(_target, _replay=false)
10
14
  return puts "Must provide a recording target" unless _target.is_a? String
11
- return puts "Memento file does not exist: #{_target}" if _replay and not File.exist? _target
15
+
16
+ target_path = memento_path _target
17
+ return puts "Memento file does not exist: #{target_path}" if _replay and not File.exist? target_path
12
18
 
13
19
  crabtrap_config = Crabfarm.config.crabtrap_config
14
20
  crabtrap_config[:mode] = _replay ? :replay : :capture
15
21
  crabtrap_config[:port] = Utils::PortDiscovery.find_available_port
16
- crabtrap_config[:bucket_path] = _target
22
+ crabtrap_config[:bucket_path] = target_path
17
23
 
18
24
  crabtrap = CrabtrapRunner.new crabtrap_config
19
25
  crabtrap.start
@@ -3,12 +3,21 @@ module Crabfarm
3
3
 
4
4
  class Error < Crabfarm::Error; end
5
5
 
6
- def parse(_snapshot, _options={})
7
- snapshot_path = GlobalState.snapshot_path _snapshot
6
+ def parse(_snapshot=nil, _options={})
7
+
8
+ raise Error.new "Crawl is only available in parser specs" unless described_class < Crabfarm::BaseParser
9
+
10
+ if _snapshot.is_a? Hash
11
+ raise ArgumentException.new 'Invalid arguments' unless _options.nil?
12
+ _options = _snapshot
13
+ _snapshot = nil
14
+ end
15
+
16
+ snapshot_path = described_class.snapshot_path _snapshot
8
17
  raise Error.new "Snapshot does not exist #{_snapshot}" unless File.exist? snapshot_path
9
18
 
10
- html = File.read snapshot_path
11
- parser = described_class.new html, _options
19
+ data = File.read snapshot_path
20
+ parser = described_class.new data, _options
12
21
  parser.parse
13
22
  parser
14
23
  end
@@ -39,7 +48,7 @@ module Crabfarm
39
48
  end
40
49
 
41
50
  def parser
42
- @parser
51
+ @parser ||= parse
43
52
  end
44
53
 
45
54
  def driver(_session_id=nil)
@@ -54,8 +63,8 @@ RSpec.configure do |config|
54
63
 
55
64
  config.around(:example) do |example|
56
65
  if described_class < Crabfarm::BaseParser
57
- if example.metadata[:parsing]
58
- @parser = parse example.metadata[:parsing], example.metadata[:using] || {}
66
+ if example.metadata[:parsing] || example[:parsing_with_params]
67
+ @parser = parse example.metadata[:parsing], example.metadata[:parsing_with_params] || {}
59
68
  end
60
69
  example.run
61
70
  elsif described_class < Crabfarm::BaseState
@@ -1,7 +1,9 @@
1
1
  class <%= parser_class %> < Crabfarm::BaseParser
2
2
 
3
3
  def parse
4
- # Do some damage!
4
+ # You can replace the following line after running the owner state specs once.
5
+ # Take a look at the 'Testing' section of the README.md for more information!
6
+ take_snapshot
5
7
  end
6
8
 
7
9
  end
@@ -1,3 +1,3 @@
1
1
  module Crabfarm
2
- VERSION = "0.2.3"
2
+ VERSION = "0.2.4"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crabfarm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas