hyper_miner 0.0.2 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/hyper_miner.rb +14 -18
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81a7aad5a9626e326548bce79bf17587ed86b7bd
|
4
|
+
data.tar.gz: c019af5a5d61ea26edbf9e675eb0ed52eea47058
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1b8c9d9540755a485fb5addd3bfd2b355f059af4b757e66addbba894a740dd397d251457af6f2a04ce89a60abdfdfedcd178d44d71779fec2bd4d205da0afed1
|
7
|
+
data.tar.gz: e78acf331295e656fbf3047165babdb716c5d58c8fc23e90d85550c0ea4bd8ae11c84393e0a24304f751214ced1f074c0a14545b50273f6934edef732a4d6ca4
|
data/lib/hyper_miner.rb
CHANGED
@@ -1,33 +1,29 @@
|
|
1
1
|
require 'open-uri'
|
2
2
|
require 'nokogiri'
|
3
|
-
|
3
|
+
|
4
|
+
require 'mine_plan.rb'
|
4
5
|
|
5
6
|
class HyperMiner
|
6
7
|
|
7
|
-
def initialize(
|
8
|
-
@
|
9
|
-
@instructions = YAML.load_file(instructions)
|
8
|
+
def initialize(resource_url)
|
9
|
+
@resource_url = resource_url
|
10
10
|
end
|
11
11
|
|
12
|
-
def mine
|
13
|
-
|
14
|
-
|
15
|
-
parse(html)
|
16
|
-
end
|
12
|
+
def mine(&block)
|
13
|
+
raise "Mine instructions must be provided." unless block_given?
|
17
14
|
|
18
|
-
|
15
|
+
html = get_resource_html
|
19
16
|
|
20
|
-
|
21
|
-
|
17
|
+
mine_plan = MinePlan.new(html, &block)
|
18
|
+
mined_data = mine_plan.execute
|
22
19
|
|
23
|
-
|
24
|
-
|
25
|
-
selector = instruction[1]['selector']
|
20
|
+
mined_data
|
21
|
+
end
|
26
22
|
|
27
|
-
|
28
|
-
end
|
23
|
+
private
|
29
24
|
|
30
|
-
|
25
|
+
def get_resource_html
|
26
|
+
open(@resource_url).read
|
31
27
|
end
|
32
28
|
|
33
29
|
end
|
metadata
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hyper_miner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Deyan Dobrinov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-11-
|
11
|
+
date: 2014-11-03 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description:
|
13
|
+
description: A simple web data mining library.
|
14
14
|
email: deyan.dobrinov@gmail.com
|
15
15
|
executables: []
|
16
16
|
extensions: []
|