yahoo-group-data 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +29 -10
- data/lib/yahoo-group-data/version.rb +1 -1
- data/lib/yahoo-group-data.rb +66 -14
- data/test/groups.yml +104 -6
- data/test/test_helper.rb +13 -0
- data/test/test_yahoo_group_data.rb +18 -2
- metadata +10 -8
data/README.md
CHANGED
@@ -11,6 +11,7 @@ g = YahooGroupData.new("http://tech.groups.yahoo.com/group/OneStopCOBOL/")
|
|
11
11
|
|
12
12
|
name = g.name
|
13
13
|
description = g.description
|
14
|
+
num_members = g.num_members
|
14
15
|
|
15
16
|
p = g.post_email
|
16
17
|
s = g.subscribe_email
|
@@ -18,6 +19,32 @@ u = g.unsubscribe_email
|
|
18
19
|
o = g.unsubscribe_email
|
19
20
|
```
|
20
21
|
|
22
|
+
### Available instance methods
|
23
|
+
|
24
|
+
These should be relatively self-explanatory. Where the data is unnavailable (for instance the group name if no group was found) the return value will be nil
|
25
|
+
|
26
|
+
#### Boolean values:
|
27
|
+
|
28
|
+
- not_found?
|
29
|
+
- private?
|
30
|
+
- age_restricted?
|
31
|
+
|
32
|
+
#### String values
|
33
|
+
|
34
|
+
- name
|
35
|
+
- description
|
36
|
+
- post_email
|
37
|
+
- subscribe_email
|
38
|
+
- owner_email
|
39
|
+
- unsubscribe_email
|
40
|
+
- language
|
41
|
+
- category
|
42
|
+
|
43
|
+
#### Other values
|
44
|
+
|
45
|
+
- num_members (Integer)
|
46
|
+
- founded (Date)
|
47
|
+
|
21
48
|
## Requirements
|
22
49
|
|
23
50
|
It's tested with Ruby 1.9.3, it probably works with older versions.
|
@@ -36,14 +63,6 @@ Or install it yourself as:
|
|
36
63
|
|
37
64
|
$ gem install yahoo-group-data
|
38
65
|
|
39
|
-
## TODO
|
40
|
-
|
41
|
-
* Parse out
|
42
|
-
* * Number of members
|
43
|
-
* * Founded date
|
44
|
-
* * Category
|
45
|
-
* * Language
|
46
|
-
|
47
66
|
## Contributing
|
48
67
|
|
49
68
|
1. Fork it
|
@@ -58,11 +77,11 @@ Or install it yourself as:
|
|
58
77
|
|
59
78
|
Rather than distribute a load of Yahoo's HTML pages with the gem there's a rake task to get the ones that are needed. Run:
|
60
79
|
|
61
|
-
|
80
|
+
$ rake fetch_yahoo_pages
|
62
81
|
|
63
82
|
after that:
|
64
83
|
|
65
|
-
rake test
|
84
|
+
$ rake test
|
66
85
|
|
67
86
|
### If you find a group the gem fails on
|
68
87
|
|
data/lib/yahoo-group-data.rb
CHANGED
@@ -3,6 +3,7 @@ require 'yahoo-group-data/version'
|
|
3
3
|
require 'curb'
|
4
4
|
require 'uri'
|
5
5
|
require 'nokogiri'
|
6
|
+
require 'date'
|
6
7
|
|
7
8
|
class YahooGroupData
|
8
9
|
def initialize(url)
|
@@ -11,11 +12,11 @@ class YahooGroupData
|
|
11
12
|
curb = Curl::Easy.new(url)
|
12
13
|
curb.follow_location = true
|
13
14
|
curb.http_get
|
14
|
-
@html = curb.body_str
|
15
|
+
@html = curb.body_str.force_encoding('iso-8859-1').encode("UTF-8")
|
15
16
|
end
|
16
17
|
|
17
18
|
def name
|
18
|
-
doc.css('span.ygrp-pname').first.content
|
19
|
+
@name ||= no_data? ? nil : doc.css('span.ygrp-pname').first.content
|
19
20
|
end
|
20
21
|
|
21
22
|
def description
|
@@ -26,38 +27,89 @@ class YahooGroupData
|
|
26
27
|
end
|
27
28
|
|
28
29
|
def post_email
|
29
|
-
subscribe_email.gsub("-subscribe@", "@")
|
30
|
+
@post_email ||= no_data? ? nil : subscribe_email.gsub("-subscribe@", "@")
|
30
31
|
end
|
31
32
|
|
32
33
|
def subscribe_email
|
33
|
-
doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-subscribe@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
|
34
|
+
@subscribe_email ||= no_data? ? nil : doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-subscribe@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
|
34
35
|
end
|
35
36
|
|
36
37
|
def owner_email
|
37
|
-
doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-owner@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
|
38
|
+
@owner_email ||= no_data? ? nil : doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-owner@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
|
38
39
|
end
|
39
40
|
|
40
41
|
def unsubscribe_email
|
41
|
-
doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-unsubscribe@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
|
42
|
+
@unsubscribe_email ||= no_data? ? nil : doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-unsubscribe@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
|
42
43
|
end
|
43
44
|
|
44
45
|
def private?
|
45
46
|
@private_group ||= (
|
46
|
-
|
47
|
-
|
48
|
-
)
|
47
|
+
doc.xpath('/html/body/div[3]/center/p/big').size > 0 and
|
48
|
+
doc.xpath('/html/body/div[3]/center/p/big').first.content.strip.match(/Sorry, this group is available to members ONLY./i)
|
49
|
+
) ? true : false
|
49
50
|
end
|
50
51
|
|
51
|
-
def
|
52
|
-
@
|
53
|
-
|
54
|
-
|
52
|
+
def not_found?
|
53
|
+
@not_found ||= (
|
54
|
+
(
|
55
|
+
doc.xpath('/html/body/div[3]/div/div/div/h3').size > 0 and
|
56
|
+
doc.xpath('/html/body/div[3]/div/div/div/h3').first.content.strip.match(/Group Not Found|Group nicht gefunden/i)
|
57
|
+
) ? true : false
|
55
58
|
)
|
56
59
|
end
|
57
60
|
|
61
|
+
def age_restricted?
|
62
|
+
@age_restricted ||= (doc.xpath('/html/body/div[3]/div/div/div/h4').size > 0 and doc.xpath('/html/body/div[3]/div/div/div/h4').first.inner_html.strip.match(/You've reached an Age-Restricted Area/i)) ? true : false
|
63
|
+
end
|
64
|
+
|
65
|
+
def founded
|
66
|
+
@founded ||= no_data? ? nil : Date.parse(date_str_to_english(doc.xpath("//ul[@class=\"ygrp-ul ygrp-info\"]//li[#{has_category? ? 3 : 2}]").inner_html.split(':')[1].strip))
|
67
|
+
end
|
68
|
+
|
69
|
+
def language
|
70
|
+
@language ||= no_data? ? nil : doc.xpath("//ul[@class=\"ygrp-ul ygrp-info\"]//li[#{has_category? ? 4 : 3}]").inner_html.split(':')[1].strip
|
71
|
+
end
|
72
|
+
|
73
|
+
def num_members
|
74
|
+
Integer(doc.xpath('//ul[@class="ygrp-ul ygrp-info"]//li[1]').inner_html.split(':')[1].strip)
|
75
|
+
end
|
76
|
+
|
77
|
+
def category
|
78
|
+
return unless has_category?
|
79
|
+
doc.xpath('/html/body/div[3]/table/tr/td/div[2]/div[2]/div/ul/li[2]/a').inner_html
|
80
|
+
end
|
81
|
+
|
58
82
|
private
|
59
83
|
|
60
|
-
|
84
|
+
def no_data?
|
85
|
+
private? or age_restricted? or not_found?
|
86
|
+
end
|
87
|
+
|
88
|
+
# French: jan,fév,mar,avr,mai,jun,jul,aoû,sep,oct,nov,déc
|
89
|
+
# German: jan,feb,mrz,apr,mai,jun,jul,aug,sep,okt,nov,dez
|
90
|
+
# Portuguese: jan,fev,mar,abr,mai,jun,jul,ago,set,out,nov,dez
|
91
|
+
# Spanish: ene,feb,mar,abr,may,jun,jul,ago,sep,oct,nov,dic
|
92
|
+
# Swedish: jan,feb,mar,apr,maj,jun,jul,aug,sep,okt,nov,dec
|
93
|
+
# US / UK: jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec
|
94
|
+
|
95
|
+
def date_str_to_english(date_str)
|
96
|
+
date_str.
|
97
|
+
gsub(/ene/i, "Jan").
|
98
|
+
gsub(/fév|fev/i, "Feb").
|
99
|
+
gsub(/mar|mrz/i, "Mar").
|
100
|
+
gsub(/avr|abr/i, "Apr").
|
101
|
+
gsub(/mai|maj/i, "May").
|
102
|
+
gsub(/aoû|ago/i, "Aug").
|
103
|
+
gsub(/set/i, "Sep").
|
104
|
+
gsub(/okt|out/i, "Oct").
|
105
|
+
gsub(/déc|dez|dic/i, "Dec")
|
106
|
+
end
|
107
|
+
|
108
|
+
attr_reader :html, :doc
|
109
|
+
|
110
|
+
def has_category?
|
111
|
+
doc.xpath('//ul[@class="ygrp-ul ygrp-info"]//li').count == 3 ? false : true
|
112
|
+
end
|
61
113
|
|
62
114
|
def doc
|
63
115
|
@doc ||= Nokogiri::HTML(html)
|
data/test/groups.yml
CHANGED
@@ -1,40 +1,138 @@
|
|
1
1
|
---
|
2
2
|
groups:
|
3
|
+
# Baseline group
|
3
4
|
- id: OneStopCOBOL
|
4
5
|
url: http://tech.groups.yahoo.com/group/OneStopCOBOL/
|
5
6
|
name: OneStopCOBOL
|
6
7
|
description: OneStopCOBOL - Official COBOL group
|
7
|
-
|
8
|
+
not_found: false
|
9
|
+
private: false
|
10
|
+
age_restricted: false
|
11
|
+
founded: Jun 24, 2008
|
12
|
+
language: English
|
13
|
+
num_members: 151
|
14
|
+
category: COBOL
|
8
15
|
post_email: OneStopCOBOL@yahoogroups.com
|
9
16
|
subscribe_email: OneStopCOBOL-subscribe@yahoogroups.com
|
10
17
|
owner_email: OneStopCOBOL-owner@yahoogroups.com
|
11
18
|
unsubscribe_email: OneStopCOBOL-unsubscribe@yahoogroups.com
|
12
19
|
|
20
|
+
# nil description
|
13
21
|
- id: Cambridge-Freegle
|
14
22
|
url: http://groups.yahoo.com/group/Cambridge-Freegle/
|
15
23
|
name: Cambridge-Freegle
|
16
24
|
description:
|
17
|
-
|
25
|
+
not_found: false
|
26
|
+
private: false
|
27
|
+
age_restricted: false
|
28
|
+
founded: Aug 24, 2011
|
29
|
+
language: English
|
30
|
+
num_members: 160
|
31
|
+
category: Recycling
|
18
32
|
post_email: Cambridge-Freegle@yahoogroups.com
|
19
33
|
subscribe_email: Cambridge-Freegle-subscribe@yahoogroups.com
|
20
34
|
owner_email: Cambridge-Freegle-owner@yahoogroups.com
|
21
35
|
unsubscribe_email: Cambridge-Freegle-unsubscribe@yahoogroups.com
|
22
36
|
|
37
|
+
# non-existant group
|
23
38
|
- id: Freecycle_MV
|
24
39
|
url: http://groups.yahoo.com/group/Freecycle_MV/
|
25
|
-
|
40
|
+
name:
|
41
|
+
description:
|
42
|
+
not_found: true
|
43
|
+
private:
|
44
|
+
age_restricted:
|
45
|
+
founded:
|
46
|
+
language:
|
47
|
+
num_members:
|
48
|
+
category:
|
49
|
+
post_email:
|
50
|
+
subscribe_email:
|
51
|
+
owner_email:
|
52
|
+
unsubscribe_email:
|
26
53
|
|
54
|
+
# private group
|
27
55
|
- id: blackpool-freecycle
|
28
56
|
url: http://groups.yahoo.com/group/blackpool-freecycle/
|
29
|
-
|
57
|
+
name: blackpool-freecycle
|
58
|
+
description:
|
59
|
+
not_found: false
|
30
60
|
private: true
|
61
|
+
age_restricted:
|
62
|
+
founded:
|
63
|
+
language:
|
64
|
+
num_members:
|
65
|
+
category:
|
66
|
+
post_email:
|
67
|
+
subscribe_email:
|
68
|
+
owner_email:
|
69
|
+
unsubscribe_email:
|
70
|
+
|
71
|
+
# Age restricted group
|
72
|
+
- id: NapaCountyCAFreecycle
|
73
|
+
url: http://groups.yahoo.com/group/NapaCountyCAFreecycle
|
74
|
+
name:
|
75
|
+
description:
|
76
|
+
not_found: false
|
77
|
+
private:
|
78
|
+
age_restricted: true
|
79
|
+
founded:
|
80
|
+
language:
|
81
|
+
num_members:
|
82
|
+
category:
|
83
|
+
post_email:
|
84
|
+
subscribe_email:
|
85
|
+
owner_email:
|
86
|
+
unsubscribe_email:
|
31
87
|
|
88
|
+
# "Related Link:" in email address element
|
32
89
|
- id: Dursleyfreecycle
|
33
90
|
url: http://groups.yahoo.com/group/Dursleyfreecycle/
|
34
91
|
name: DursleyFreecycle
|
35
92
|
description: DursleyFreecycle(R)
|
36
|
-
|
93
|
+
not_found: false
|
94
|
+
private: false
|
95
|
+
age_restricted: false
|
96
|
+
founded: Sep 13, 2009
|
97
|
+
language: English
|
98
|
+
num_members: 1
|
99
|
+
category: Recycling
|
37
100
|
post_email: DursleyFreecycle@yahoogroups.co.uk
|
38
101
|
subscribe_email: DursleyFreecycle-subscribe@yahoogroups.co.uk
|
39
102
|
owner_email: DursleyFreecycle-owner@yahoogroups.co.uk
|
40
|
-
unsubscribe_email: DursleyFreecycle-unsubscribe@yahoogroups.co.uk
|
103
|
+
unsubscribe_email: DursleyFreecycle-unsubscribe@yahoogroups.co.uk
|
104
|
+
|
105
|
+
# nil Category
|
106
|
+
- id: redcar-cleveland-freegle
|
107
|
+
url: http://groups.yahoo.com/group/redcar-cleveland-freegle/
|
108
|
+
name: redcar-cleveland-freegle
|
109
|
+
description: Redcar & Cleveland Freegle
|
110
|
+
not_found: false
|
111
|
+
private: false
|
112
|
+
age_restricted: false
|
113
|
+
founded: Mar 30, 2007
|
114
|
+
language: English
|
115
|
+
num_members: 2985
|
116
|
+
category:
|
117
|
+
post_email: redcar-cleveland-freegle@yahoogroups.com
|
118
|
+
subscribe_email: redcar-cleveland-freegle-subscribe@yahoogroups.com
|
119
|
+
owner_email: redcar-cleveland-freegle-owner@yahoogroups.com
|
120
|
+
unsubscribe_email: redcar-cleveland-freegle-unsubscribe@yahoogroups.com
|
121
|
+
|
122
|
+
# Foreign language, particularly for the date format
|
123
|
+
- id:
|
124
|
+
url: http://de.groups.yahoo.com/group/freecycle-michelstadt/
|
125
|
+
name: freecycle-michelstadt
|
126
|
+
description: Freecycle Michelstadt / Odenwald
|
127
|
+
not_found: false
|
128
|
+
private: false
|
129
|
+
age_restricted: false
|
130
|
+
founded: Mai 6, 2004
|
131
|
+
language: Deutsch
|
132
|
+
num_members: 63
|
133
|
+
category: Kostenlos
|
134
|
+
post_email: freecycle-michelstadt@yahoogroups.de
|
135
|
+
subscribe_email: freecycle-michelstadt-subscribe@yahoogroups.de
|
136
|
+
owner_email: freecycle-michelstadt-owner@yahoogroups.de
|
137
|
+
unsubscribe_email: freecycle-michelstadt-unsubscribe@yahoogroups.de
|
138
|
+
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
def date_str_to_english(date_str)
|
3
|
+
date_str.
|
4
|
+
gsub(/ene/i, "Jan").
|
5
|
+
gsub(/fév|fev/i, "Feb").
|
6
|
+
gsub(/mar|mrz/i, "Mar").
|
7
|
+
gsub(/avr|abr/i, "Apr").
|
8
|
+
gsub(/mai|maj/i, "May").
|
9
|
+
gsub(/aoû|ago/i, "Aug").
|
10
|
+
gsub(/set/i, "Sep").
|
11
|
+
gsub(/okt|out/i, "Oct").
|
12
|
+
gsub(/déc|dez|dic/i, "Dec")
|
13
|
+
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'test/unit'
|
2
|
+
require 'test_helper'
|
2
3
|
require 'webmock/test_unit'
|
3
4
|
require 'yahoo-group-data'
|
5
|
+
require 'date'
|
4
6
|
|
5
7
|
class YahooGroupDataTest < Test::Unit::TestCase
|
6
8
|
def test_initialize_with_invalid_params
|
@@ -42,15 +44,29 @@ class YahooGroupDataTest < Test::Unit::TestCase
|
|
42
44
|
to_return(:status => 200, :body => File.read("test/yahoo_pages/#{g_data['id']}.html"), :headers => {})
|
43
45
|
|
44
46
|
group = YahooGroupData.new(g_data["url"])
|
45
|
-
|
47
|
+
if g_data["not_found"]
|
48
|
+
assert_equal g_data["not_found"], group.not_found?
|
49
|
+
elsif g_data["private"]
|
50
|
+
assert_equal g_data["not_found"], group.not_found?
|
51
|
+
assert_equal g_data["private"], group.private?
|
52
|
+
elsif g_data["age_restricted"]
|
53
|
+
assert_equal g_data["not_found"], group.not_found?
|
54
|
+
assert_equal g_data["age_restricted"], group.age_restricted?
|
55
|
+
else
|
56
|
+
assert_equal g_data["age_restricted"], group.age_restricted?
|
57
|
+
assert_equal g_data["private"], group.private?
|
58
|
+
assert_equal g_data["not_found"], group.not_found?
|
46
59
|
assert_equal g_data["name"], group.name
|
47
60
|
assert_equal g_data["description"], group.description
|
48
61
|
assert_equal g_data["post_email"], group.post_email
|
49
62
|
assert_equal g_data["subscribe_email"], group.subscribe_email
|
50
63
|
assert_equal g_data["owner_email"], group.owner_email
|
51
64
|
assert_equal g_data["unsubscribe_email"], group.unsubscribe_email
|
65
|
+
assert_equal Date.parse(date_str_to_english(g_data["founded"])), group.founded
|
66
|
+
assert_equal g_data["language"], group.language
|
67
|
+
assert_equal g_data["num_members"], group.num_members
|
68
|
+
assert_equal g_data["category"], group.category
|
52
69
|
end
|
53
|
-
assert_equal g_data["defunct"], group.defunct?
|
54
70
|
end
|
55
71
|
end
|
56
72
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yahoo-group-data
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-01-
|
12
|
+
date: 2012-01-29 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &70145628826300 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '1.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70145628826300
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: curb
|
27
|
-
requirement: &
|
27
|
+
requirement: &70145628825740 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0.8'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70145628825740
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: webmock
|
38
|
-
requirement: &
|
38
|
+
requirement: &70145628825320 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70145628825320
|
47
47
|
description: A lib to fetch public Yahoo group data
|
48
48
|
email:
|
49
49
|
- will@willj.net
|
@@ -59,6 +59,7 @@ files:
|
|
59
59
|
- lib/yahoo-group-data.rb
|
60
60
|
- lib/yahoo-group-data/version.rb
|
61
61
|
- test/groups.yml
|
62
|
+
- test/test_helper.rb
|
62
63
|
- test/test_yahoo_group_data.rb
|
63
64
|
- test/yahoo_pages/.gitkeep
|
64
65
|
- yahoo-group-data.gemspec
|
@@ -89,5 +90,6 @@ summary: A lib to fetch the publicly available Yahoo group data from a Yahoo gro
|
|
89
90
|
page
|
90
91
|
test_files:
|
91
92
|
- test/groups.yml
|
93
|
+
- test/test_helper.rb
|
92
94
|
- test/test_yahoo_group_data.rb
|
93
95
|
- test/yahoo_pages/.gitkeep
|