yahoo-group-data 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +29 -10
- data/lib/yahoo-group-data/version.rb +1 -1
- data/lib/yahoo-group-data.rb +66 -14
- data/test/groups.yml +104 -6
- data/test/test_helper.rb +13 -0
- data/test/test_yahoo_group_data.rb +18 -2
- metadata +10 -8
data/README.md
CHANGED
@@ -11,6 +11,7 @@ g = YahooGroupData.new("http://tech.groups.yahoo.com/group/OneStopCOBOL/")
|
|
11
11
|
|
12
12
|
name = g.name
|
13
13
|
description = g.description
|
14
|
+
num_members = g.num_members
|
14
15
|
|
15
16
|
p = g.post_email
|
16
17
|
s = g.subscribe_email
|
@@ -18,6 +19,32 @@ u = g.unsubscribe_email
|
|
18
19
|
o = g.unsubscribe_email
|
19
20
|
```
|
20
21
|
|
22
|
+
### Available instance methods
|
23
|
+
|
24
|
+
These should be relatively self-explanatory. Where the data is unnavailable (for instance the group name if no group was found) the return value will be nil
|
25
|
+
|
26
|
+
#### Boolean values:
|
27
|
+
|
28
|
+
- not_found?
|
29
|
+
- private?
|
30
|
+
- age_restricted?
|
31
|
+
|
32
|
+
#### String values
|
33
|
+
|
34
|
+
- name
|
35
|
+
- description
|
36
|
+
- post_email
|
37
|
+
- subscribe_email
|
38
|
+
- owner_email
|
39
|
+
- unsubscribe_email
|
40
|
+
- language
|
41
|
+
- category
|
42
|
+
|
43
|
+
#### Other values
|
44
|
+
|
45
|
+
- num_members (Integer)
|
46
|
+
- founded (Date)
|
47
|
+
|
21
48
|
## Requirements
|
22
49
|
|
23
50
|
It's tested with Ruby 1.9.3, it probably works with older versions.
|
@@ -36,14 +63,6 @@ Or install it yourself as:
|
|
36
63
|
|
37
64
|
$ gem install yahoo-group-data
|
38
65
|
|
39
|
-
## TODO
|
40
|
-
|
41
|
-
* Parse out
|
42
|
-
* * Number of members
|
43
|
-
* * Founded date
|
44
|
-
* * Category
|
45
|
-
* * Language
|
46
|
-
|
47
66
|
## Contributing
|
48
67
|
|
49
68
|
1. Fork it
|
@@ -58,11 +77,11 @@ Or install it yourself as:
|
|
58
77
|
|
59
78
|
Rather than distribute a load of Yahoo's HTML pages with the gem there's a rake task to get the ones that are needed. Run:
|
60
79
|
|
61
|
-
|
80
|
+
$ rake fetch_yahoo_pages
|
62
81
|
|
63
82
|
after that:
|
64
83
|
|
65
|
-
rake test
|
84
|
+
$ rake test
|
66
85
|
|
67
86
|
### If you find a group the gem fails on
|
68
87
|
|
data/lib/yahoo-group-data.rb
CHANGED
@@ -3,6 +3,7 @@ require 'yahoo-group-data/version'
|
|
3
3
|
require 'curb'
|
4
4
|
require 'uri'
|
5
5
|
require 'nokogiri'
|
6
|
+
require 'date'
|
6
7
|
|
7
8
|
class YahooGroupData
|
8
9
|
def initialize(url)
|
@@ -11,11 +12,11 @@ class YahooGroupData
|
|
11
12
|
curb = Curl::Easy.new(url)
|
12
13
|
curb.follow_location = true
|
13
14
|
curb.http_get
|
14
|
-
@html = curb.body_str
|
15
|
+
@html = curb.body_str.force_encoding('iso-8859-1').encode("UTF-8")
|
15
16
|
end
|
16
17
|
|
17
18
|
def name
|
18
|
-
doc.css('span.ygrp-pname').first.content
|
19
|
+
@name ||= no_data? ? nil : doc.css('span.ygrp-pname').first.content
|
19
20
|
end
|
20
21
|
|
21
22
|
def description
|
@@ -26,38 +27,89 @@ class YahooGroupData
|
|
26
27
|
end
|
27
28
|
|
28
29
|
def post_email
|
29
|
-
subscribe_email.gsub("-subscribe@", "@")
|
30
|
+
@post_email ||= no_data? ? nil : subscribe_email.gsub("-subscribe@", "@")
|
30
31
|
end
|
31
32
|
|
32
33
|
def subscribe_email
|
33
|
-
doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-subscribe@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
|
34
|
+
@subscribe_email ||= no_data? ? nil : doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-subscribe@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
|
34
35
|
end
|
35
36
|
|
36
37
|
def owner_email
|
37
|
-
doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-owner@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
|
38
|
+
@owner_email ||= no_data? ? nil : doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-owner@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
|
38
39
|
end
|
39
40
|
|
40
41
|
def unsubscribe_email
|
41
|
-
doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-unsubscribe@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
|
42
|
+
@unsubscribe_email ||= no_data? ? nil : doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-unsubscribe@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
|
42
43
|
end
|
43
44
|
|
44
45
|
def private?
|
45
46
|
@private_group ||= (
|
46
|
-
|
47
|
-
|
48
|
-
)
|
47
|
+
doc.xpath('/html/body/div[3]/center/p/big').size > 0 and
|
48
|
+
doc.xpath('/html/body/div[3]/center/p/big').first.content.strip.match(/Sorry, this group is available to members ONLY./i)
|
49
|
+
) ? true : false
|
49
50
|
end
|
50
51
|
|
51
|
-
def
|
52
|
-
@
|
53
|
-
|
54
|
-
|
52
|
+
def not_found?
|
53
|
+
@not_found ||= (
|
54
|
+
(
|
55
|
+
doc.xpath('/html/body/div[3]/div/div/div/h3').size > 0 and
|
56
|
+
doc.xpath('/html/body/div[3]/div/div/div/h3').first.content.strip.match(/Group Not Found|Group nicht gefunden/i)
|
57
|
+
) ? true : false
|
55
58
|
)
|
56
59
|
end
|
57
60
|
|
61
|
+
def age_restricted?
|
62
|
+
@age_restricted ||= (doc.xpath('/html/body/div[3]/div/div/div/h4').size > 0 and doc.xpath('/html/body/div[3]/div/div/div/h4').first.inner_html.strip.match(/You've reached an Age-Restricted Area/i)) ? true : false
|
63
|
+
end
|
64
|
+
|
65
|
+
def founded
|
66
|
+
@founded ||= no_data? ? nil : Date.parse(date_str_to_english(doc.xpath("//ul[@class=\"ygrp-ul ygrp-info\"]//li[#{has_category? ? 3 : 2}]").inner_html.split(':')[1].strip))
|
67
|
+
end
|
68
|
+
|
69
|
+
def language
|
70
|
+
@language ||= no_data? ? nil : doc.xpath("//ul[@class=\"ygrp-ul ygrp-info\"]//li[#{has_category? ? 4 : 3}]").inner_html.split(':')[1].strip
|
71
|
+
end
|
72
|
+
|
73
|
+
def num_members
|
74
|
+
Integer(doc.xpath('//ul[@class="ygrp-ul ygrp-info"]//li[1]').inner_html.split(':')[1].strip)
|
75
|
+
end
|
76
|
+
|
77
|
+
def category
|
78
|
+
return unless has_category?
|
79
|
+
doc.xpath('/html/body/div[3]/table/tr/td/div[2]/div[2]/div/ul/li[2]/a').inner_html
|
80
|
+
end
|
81
|
+
|
58
82
|
private
|
59
83
|
|
60
|
-
|
84
|
+
def no_data?
|
85
|
+
private? or age_restricted? or not_found?
|
86
|
+
end
|
87
|
+
|
88
|
+
# French: jan,fév,mar,avr,mai,jun,jul,aoû,sep,oct,nov,déc
|
89
|
+
# German: jan,feb,mrz,apr,mai,jun,jul,aug,sep,okt,nov,dez
|
90
|
+
# Portuguese: jan,fev,mar,abr,mai,jun,jul,ago,set,out,nov,dez
|
91
|
+
# Spanish: ene,feb,mar,abr,may,jun,jul,ago,sep,oct,nov,dic
|
92
|
+
# Swedish: jan,feb,mar,apr,maj,jun,jul,aug,sep,okt,nov,dec
|
93
|
+
# US / UK: jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec
|
94
|
+
|
95
|
+
def date_str_to_english(date_str)
|
96
|
+
date_str.
|
97
|
+
gsub(/ene/i, "Jan").
|
98
|
+
gsub(/fév|fev/i, "Feb").
|
99
|
+
gsub(/mar|mrz/i, "Mar").
|
100
|
+
gsub(/avr|abr/i, "Apr").
|
101
|
+
gsub(/mai|maj/i, "May").
|
102
|
+
gsub(/aoû|ago/i, "Aug").
|
103
|
+
gsub(/set/i, "Sep").
|
104
|
+
gsub(/okt|out/i, "Oct").
|
105
|
+
gsub(/déc|dez|dic/i, "Dec")
|
106
|
+
end
|
107
|
+
|
108
|
+
attr_reader :html, :doc
|
109
|
+
|
110
|
+
def has_category?
|
111
|
+
doc.xpath('//ul[@class="ygrp-ul ygrp-info"]//li').count == 3 ? false : true
|
112
|
+
end
|
61
113
|
|
62
114
|
def doc
|
63
115
|
@doc ||= Nokogiri::HTML(html)
|
data/test/groups.yml
CHANGED
@@ -1,40 +1,138 @@
|
|
1
1
|
---
|
2
2
|
groups:
|
3
|
+
# Baseline group
|
3
4
|
- id: OneStopCOBOL
|
4
5
|
url: http://tech.groups.yahoo.com/group/OneStopCOBOL/
|
5
6
|
name: OneStopCOBOL
|
6
7
|
description: OneStopCOBOL - Official COBOL group
|
7
|
-
|
8
|
+
not_found: false
|
9
|
+
private: false
|
10
|
+
age_restricted: false
|
11
|
+
founded: Jun 24, 2008
|
12
|
+
language: English
|
13
|
+
num_members: 151
|
14
|
+
category: COBOL
|
8
15
|
post_email: OneStopCOBOL@yahoogroups.com
|
9
16
|
subscribe_email: OneStopCOBOL-subscribe@yahoogroups.com
|
10
17
|
owner_email: OneStopCOBOL-owner@yahoogroups.com
|
11
18
|
unsubscribe_email: OneStopCOBOL-unsubscribe@yahoogroups.com
|
12
19
|
|
20
|
+
# nil description
|
13
21
|
- id: Cambridge-Freegle
|
14
22
|
url: http://groups.yahoo.com/group/Cambridge-Freegle/
|
15
23
|
name: Cambridge-Freegle
|
16
24
|
description:
|
17
|
-
|
25
|
+
not_found: false
|
26
|
+
private: false
|
27
|
+
age_restricted: false
|
28
|
+
founded: Aug 24, 2011
|
29
|
+
language: English
|
30
|
+
num_members: 160
|
31
|
+
category: Recycling
|
18
32
|
post_email: Cambridge-Freegle@yahoogroups.com
|
19
33
|
subscribe_email: Cambridge-Freegle-subscribe@yahoogroups.com
|
20
34
|
owner_email: Cambridge-Freegle-owner@yahoogroups.com
|
21
35
|
unsubscribe_email: Cambridge-Freegle-unsubscribe@yahoogroups.com
|
22
36
|
|
37
|
+
# non-existant group
|
23
38
|
- id: Freecycle_MV
|
24
39
|
url: http://groups.yahoo.com/group/Freecycle_MV/
|
25
|
-
|
40
|
+
name:
|
41
|
+
description:
|
42
|
+
not_found: true
|
43
|
+
private:
|
44
|
+
age_restricted:
|
45
|
+
founded:
|
46
|
+
language:
|
47
|
+
num_members:
|
48
|
+
category:
|
49
|
+
post_email:
|
50
|
+
subscribe_email:
|
51
|
+
owner_email:
|
52
|
+
unsubscribe_email:
|
26
53
|
|
54
|
+
# private group
|
27
55
|
- id: blackpool-freecycle
|
28
56
|
url: http://groups.yahoo.com/group/blackpool-freecycle/
|
29
|
-
|
57
|
+
name: blackpool-freecycle
|
58
|
+
description:
|
59
|
+
not_found: false
|
30
60
|
private: true
|
61
|
+
age_restricted:
|
62
|
+
founded:
|
63
|
+
language:
|
64
|
+
num_members:
|
65
|
+
category:
|
66
|
+
post_email:
|
67
|
+
subscribe_email:
|
68
|
+
owner_email:
|
69
|
+
unsubscribe_email:
|
70
|
+
|
71
|
+
# Age restricted group
|
72
|
+
- id: NapaCountyCAFreecycle
|
73
|
+
url: http://groups.yahoo.com/group/NapaCountyCAFreecycle
|
74
|
+
name:
|
75
|
+
description:
|
76
|
+
not_found: false
|
77
|
+
private:
|
78
|
+
age_restricted: true
|
79
|
+
founded:
|
80
|
+
language:
|
81
|
+
num_members:
|
82
|
+
category:
|
83
|
+
post_email:
|
84
|
+
subscribe_email:
|
85
|
+
owner_email:
|
86
|
+
unsubscribe_email:
|
31
87
|
|
88
|
+
# "Related Link:" in email address element
|
32
89
|
- id: Dursleyfreecycle
|
33
90
|
url: http://groups.yahoo.com/group/Dursleyfreecycle/
|
34
91
|
name: DursleyFreecycle
|
35
92
|
description: DursleyFreecycle(R)
|
36
|
-
|
93
|
+
not_found: false
|
94
|
+
private: false
|
95
|
+
age_restricted: false
|
96
|
+
founded: Sep 13, 2009
|
97
|
+
language: English
|
98
|
+
num_members: 1
|
99
|
+
category: Recycling
|
37
100
|
post_email: DursleyFreecycle@yahoogroups.co.uk
|
38
101
|
subscribe_email: DursleyFreecycle-subscribe@yahoogroups.co.uk
|
39
102
|
owner_email: DursleyFreecycle-owner@yahoogroups.co.uk
|
40
|
-
unsubscribe_email: DursleyFreecycle-unsubscribe@yahoogroups.co.uk
|
103
|
+
unsubscribe_email: DursleyFreecycle-unsubscribe@yahoogroups.co.uk
|
104
|
+
|
105
|
+
# nil Category
|
106
|
+
- id: redcar-cleveland-freegle
|
107
|
+
url: http://groups.yahoo.com/group/redcar-cleveland-freegle/
|
108
|
+
name: redcar-cleveland-freegle
|
109
|
+
description: Redcar & Cleveland Freegle
|
110
|
+
not_found: false
|
111
|
+
private: false
|
112
|
+
age_restricted: false
|
113
|
+
founded: Mar 30, 2007
|
114
|
+
language: English
|
115
|
+
num_members: 2985
|
116
|
+
category:
|
117
|
+
post_email: redcar-cleveland-freegle@yahoogroups.com
|
118
|
+
subscribe_email: redcar-cleveland-freegle-subscribe@yahoogroups.com
|
119
|
+
owner_email: redcar-cleveland-freegle-owner@yahoogroups.com
|
120
|
+
unsubscribe_email: redcar-cleveland-freegle-unsubscribe@yahoogroups.com
|
121
|
+
|
122
|
+
# Foreign language, particularly for the date format
|
123
|
+
- id:
|
124
|
+
url: http://de.groups.yahoo.com/group/freecycle-michelstadt/
|
125
|
+
name: freecycle-michelstadt
|
126
|
+
description: Freecycle Michelstadt / Odenwald
|
127
|
+
not_found: false
|
128
|
+
private: false
|
129
|
+
age_restricted: false
|
130
|
+
founded: Mai 6, 2004
|
131
|
+
language: Deutsch
|
132
|
+
num_members: 63
|
133
|
+
category: Kostenlos
|
134
|
+
post_email: freecycle-michelstadt@yahoogroups.de
|
135
|
+
subscribe_email: freecycle-michelstadt-subscribe@yahoogroups.de
|
136
|
+
owner_email: freecycle-michelstadt-owner@yahoogroups.de
|
137
|
+
unsubscribe_email: freecycle-michelstadt-unsubscribe@yahoogroups.de
|
138
|
+
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
def date_str_to_english(date_str)
|
3
|
+
date_str.
|
4
|
+
gsub(/ene/i, "Jan").
|
5
|
+
gsub(/fév|fev/i, "Feb").
|
6
|
+
gsub(/mar|mrz/i, "Mar").
|
7
|
+
gsub(/avr|abr/i, "Apr").
|
8
|
+
gsub(/mai|maj/i, "May").
|
9
|
+
gsub(/aoû|ago/i, "Aug").
|
10
|
+
gsub(/set/i, "Sep").
|
11
|
+
gsub(/okt|out/i, "Oct").
|
12
|
+
gsub(/déc|dez|dic/i, "Dec")
|
13
|
+
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'test/unit'
|
2
|
+
require 'test_helper'
|
2
3
|
require 'webmock/test_unit'
|
3
4
|
require 'yahoo-group-data'
|
5
|
+
require 'date'
|
4
6
|
|
5
7
|
class YahooGroupDataTest < Test::Unit::TestCase
|
6
8
|
def test_initialize_with_invalid_params
|
@@ -42,15 +44,29 @@ class YahooGroupDataTest < Test::Unit::TestCase
|
|
42
44
|
to_return(:status => 200, :body => File.read("test/yahoo_pages/#{g_data['id']}.html"), :headers => {})
|
43
45
|
|
44
46
|
group = YahooGroupData.new(g_data["url"])
|
45
|
-
|
47
|
+
if g_data["not_found"]
|
48
|
+
assert_equal g_data["not_found"], group.not_found?
|
49
|
+
elsif g_data["private"]
|
50
|
+
assert_equal g_data["not_found"], group.not_found?
|
51
|
+
assert_equal g_data["private"], group.private?
|
52
|
+
elsif g_data["age_restricted"]
|
53
|
+
assert_equal g_data["not_found"], group.not_found?
|
54
|
+
assert_equal g_data["age_restricted"], group.age_restricted?
|
55
|
+
else
|
56
|
+
assert_equal g_data["age_restricted"], group.age_restricted?
|
57
|
+
assert_equal g_data["private"], group.private?
|
58
|
+
assert_equal g_data["not_found"], group.not_found?
|
46
59
|
assert_equal g_data["name"], group.name
|
47
60
|
assert_equal g_data["description"], group.description
|
48
61
|
assert_equal g_data["post_email"], group.post_email
|
49
62
|
assert_equal g_data["subscribe_email"], group.subscribe_email
|
50
63
|
assert_equal g_data["owner_email"], group.owner_email
|
51
64
|
assert_equal g_data["unsubscribe_email"], group.unsubscribe_email
|
65
|
+
assert_equal Date.parse(date_str_to_english(g_data["founded"])), group.founded
|
66
|
+
assert_equal g_data["language"], group.language
|
67
|
+
assert_equal g_data["num_members"], group.num_members
|
68
|
+
assert_equal g_data["category"], group.category
|
52
69
|
end
|
53
|
-
assert_equal g_data["defunct"], group.defunct?
|
54
70
|
end
|
55
71
|
end
|
56
72
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yahoo-group-data
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-01-
|
12
|
+
date: 2012-01-29 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &70145628826300 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '1.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70145628826300
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: curb
|
27
|
-
requirement: &
|
27
|
+
requirement: &70145628825740 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0.8'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70145628825740
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: webmock
|
38
|
-
requirement: &
|
38
|
+
requirement: &70145628825320 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70145628825320
|
47
47
|
description: A lib to fetch public Yahoo group data
|
48
48
|
email:
|
49
49
|
- will@willj.net
|
@@ -59,6 +59,7 @@ files:
|
|
59
59
|
- lib/yahoo-group-data.rb
|
60
60
|
- lib/yahoo-group-data/version.rb
|
61
61
|
- test/groups.yml
|
62
|
+
- test/test_helper.rb
|
62
63
|
- test/test_yahoo_group_data.rb
|
63
64
|
- test/yahoo_pages/.gitkeep
|
64
65
|
- yahoo-group-data.gemspec
|
@@ -89,5 +90,6 @@ summary: A lib to fetch the publicly available Yahoo group data from a Yahoo gro
|
|
89
90
|
page
|
90
91
|
test_files:
|
91
92
|
- test/groups.yml
|
93
|
+
- test/test_helper.rb
|
92
94
|
- test/test_yahoo_group_data.rb
|
93
95
|
- test/yahoo_pages/.gitkeep
|