yahoo-group-data 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -11,6 +11,7 @@ g = YahooGroupData.new("http://tech.groups.yahoo.com/group/OneStopCOBOL/")
11
11
 
12
12
  name = g.name
13
13
  description = g.description
14
+ num_members = g.num_members
14
15
 
15
16
  p = g.post_email
16
17
  s = g.subscribe_email
@@ -18,6 +19,32 @@ u = g.unsubscribe_email
18
19
  o = g.unsubscribe_email
19
20
  ```
20
21
 
22
+ ### Available instance methods
23
+
24
+ These should be relatively self-explanatory. Where the data is unnavailable (for instance the group name if no group was found) the return value will be nil
25
+
26
+ #### Boolean values:
27
+
28
+ - not_found?
29
+ - private?
30
+ - age_restricted?
31
+
32
+ #### String values
33
+
34
+ - name
35
+ - description
36
+ - post_email
37
+ - subscribe_email
38
+ - owner_email
39
+ - unsubscribe_email
40
+ - language
41
+ - category
42
+
43
+ #### Other values
44
+
45
+ - num_members (Integer)
46
+ - founded (Date)
47
+
21
48
  ## Requirements
22
49
 
23
50
  It's tested with Ruby 1.9.3, it probably works with older versions.
@@ -36,14 +63,6 @@ Or install it yourself as:
36
63
 
37
64
  $ gem install yahoo-group-data
38
65
 
39
- ## TODO
40
-
41
- * Parse out
42
- * * Number of members
43
- * * Founded date
44
- * * Category
45
- * * Language
46
-
47
66
  ## Contributing
48
67
 
49
68
  1. Fork it
@@ -58,11 +77,11 @@ Or install it yourself as:
58
77
 
59
78
  Rather than distribute a load of Yahoo's HTML pages with the gem there's a rake task to get the ones that are needed. Run:
60
79
 
61
- `rake fetch_yahoo_pages`
80
+ $ rake fetch_yahoo_pages
62
81
 
63
82
  after that:
64
83
 
65
- rake test
84
+ $ rake test
66
85
 
67
86
  ### If you find a group the gem fails on
68
87
 
@@ -1,3 +1,3 @@
1
1
  class YahooGroupData
2
- VERSION = "0.0.1"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -3,6 +3,7 @@ require 'yahoo-group-data/version'
3
3
  require 'curb'
4
4
  require 'uri'
5
5
  require 'nokogiri'
6
+ require 'date'
6
7
 
7
8
  class YahooGroupData
8
9
  def initialize(url)
@@ -11,11 +12,11 @@ class YahooGroupData
11
12
  curb = Curl::Easy.new(url)
12
13
  curb.follow_location = true
13
14
  curb.http_get
14
- @html = curb.body_str
15
+ @html = curb.body_str.force_encoding('iso-8859-1').encode("UTF-8")
15
16
  end
16
17
 
17
18
  def name
18
- doc.css('span.ygrp-pname').first.content
19
+ @name ||= no_data? ? nil : doc.css('span.ygrp-pname').first.content
19
20
  end
20
21
 
21
22
  def description
@@ -26,38 +27,89 @@ class YahooGroupData
26
27
  end
27
28
 
28
29
  def post_email
29
- subscribe_email.gsub("-subscribe@", "@")
30
+ @post_email ||= no_data? ? nil : subscribe_email.gsub("-subscribe@", "@")
30
31
  end
31
32
 
32
33
  def subscribe_email
33
- doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-subscribe@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
34
+ @subscribe_email ||= no_data? ? nil : doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-subscribe@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
34
35
  end
35
36
 
36
37
  def owner_email
37
- doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-owner@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
38
+ @owner_email ||= no_data? ? nil : doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-owner@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
38
39
  end
39
40
 
40
41
  def unsubscribe_email
41
- doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-unsubscribe@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
42
+ @unsubscribe_email ||= no_data? ? nil : doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-unsubscribe@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
42
43
  end
43
44
 
44
45
  def private?
45
46
  @private_group ||= (
46
- not_found_element = doc.xpath('/html/body/div[3]/center/p/big')
47
- not_found_element.size > 0 and not_found_element.first.content.strip.match(/Sorry, this group is available to members ONLY./i) ? true : false
48
- )
47
+ doc.xpath('/html/body/div[3]/center/p/big').size > 0 and
48
+ doc.xpath('/html/body/div[3]/center/p/big').first.content.strip.match(/Sorry, this group is available to members ONLY./i)
49
+ ) ? true : false
49
50
  end
50
51
 
51
- def defunct?
52
- @defunct ||= (
53
- not_found_element = doc.xpath('/html/body/div[3]/div/div/div/h3')
54
- not_found_element.size > 0 and not_found_element.first.content.strip.match(/Group Not Found|Group nicht gefunden/i) ? true : false
52
+ def not_found?
53
+ @not_found ||= (
54
+ (
55
+ doc.xpath('/html/body/div[3]/div/div/div/h3').size > 0 and
56
+ doc.xpath('/html/body/div[3]/div/div/div/h3').first.content.strip.match(/Group Not Found|Group nicht gefunden/i)
57
+ ) ? true : false
55
58
  )
56
59
  end
57
60
 
61
+ def age_restricted?
62
+ @age_restricted ||= (doc.xpath('/html/body/div[3]/div/div/div/h4').size > 0 and doc.xpath('/html/body/div[3]/div/div/div/h4').first.inner_html.strip.match(/You've reached an Age-Restricted Area/i)) ? true : false
63
+ end
64
+
65
+ def founded
66
+ @founded ||= no_data? ? nil : Date.parse(date_str_to_english(doc.xpath("//ul[@class=\"ygrp-ul ygrp-info\"]//li[#{has_category? ? 3 : 2}]").inner_html.split(':')[1].strip))
67
+ end
68
+
69
+ def language
70
+ @language ||= no_data? ? nil : doc.xpath("//ul[@class=\"ygrp-ul ygrp-info\"]//li[#{has_category? ? 4 : 3}]").inner_html.split(':')[1].strip
71
+ end
72
+
73
+ def num_members
74
+ Integer(doc.xpath('//ul[@class="ygrp-ul ygrp-info"]//li[1]').inner_html.split(':')[1].strip)
75
+ end
76
+
77
+ def category
78
+ return unless has_category?
79
+ doc.xpath('/html/body/div[3]/table/tr/td/div[2]/div[2]/div/ul/li[2]/a').inner_html
80
+ end
81
+
58
82
  private
59
83
 
60
- attr_reader :html, :doc, :defunct #, :private_group
84
+ def no_data?
85
+ private? or age_restricted? or not_found?
86
+ end
87
+
88
+ # French: jan,fév,mar,avr,mai,jun,jul,aoû,sep,oct,nov,déc
89
+ # German: jan,feb,mrz,apr,mai,jun,jul,aug,sep,okt,nov,dez
90
+ # Portuguese: jan,fev,mar,abr,mai,jun,jul,ago,set,out,nov,dez
91
+ # Spanish: ene,feb,mar,abr,may,jun,jul,ago,sep,oct,nov,dic
92
+ # Swedish: jan,feb,mar,apr,maj,jun,jul,aug,sep,okt,nov,dec
93
+ # US / UK: jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec
94
+
95
+ def date_str_to_english(date_str)
96
+ date_str.
97
+ gsub(/ene/i, "Jan").
98
+ gsub(/fév|fev/i, "Feb").
99
+ gsub(/mar|mrz/i, "Mar").
100
+ gsub(/avr|abr/i, "Apr").
101
+ gsub(/mai|maj/i, "May").
102
+ gsub(/aoû|ago/i, "Aug").
103
+ gsub(/set/i, "Sep").
104
+ gsub(/okt|out/i, "Oct").
105
+ gsub(/déc|dez|dic/i, "Dec")
106
+ end
107
+
108
+ attr_reader :html, :doc
109
+
110
+ def has_category?
111
+ doc.xpath('//ul[@class="ygrp-ul ygrp-info"]//li').count == 3 ? false : true
112
+ end
61
113
 
62
114
  def doc
63
115
  @doc ||= Nokogiri::HTML(html)
data/test/groups.yml CHANGED
@@ -1,40 +1,138 @@
1
1
  ---
2
2
  groups:
3
+ # Baseline group
3
4
  - id: OneStopCOBOL
4
5
  url: http://tech.groups.yahoo.com/group/OneStopCOBOL/
5
6
  name: OneStopCOBOL
6
7
  description: OneStopCOBOL - Official COBOL group
7
- defunct: false
8
+ not_found: false
9
+ private: false
10
+ age_restricted: false
11
+ founded: Jun 24, 2008
12
+ language: English
13
+ num_members: 151
14
+ category: COBOL
8
15
  post_email: OneStopCOBOL@yahoogroups.com
9
16
  subscribe_email: OneStopCOBOL-subscribe@yahoogroups.com
10
17
  owner_email: OneStopCOBOL-owner@yahoogroups.com
11
18
  unsubscribe_email: OneStopCOBOL-unsubscribe@yahoogroups.com
12
19
 
20
+ # nil description
13
21
  - id: Cambridge-Freegle
14
22
  url: http://groups.yahoo.com/group/Cambridge-Freegle/
15
23
  name: Cambridge-Freegle
16
24
  description:
17
- defunct: false
25
+ not_found: false
26
+ private: false
27
+ age_restricted: false
28
+ founded: Aug 24, 2011
29
+ language: English
30
+ num_members: 160
31
+ category: Recycling
18
32
  post_email: Cambridge-Freegle@yahoogroups.com
19
33
  subscribe_email: Cambridge-Freegle-subscribe@yahoogroups.com
20
34
  owner_email: Cambridge-Freegle-owner@yahoogroups.com
21
35
  unsubscribe_email: Cambridge-Freegle-unsubscribe@yahoogroups.com
22
36
 
37
+ # non-existant group
23
38
  - id: Freecycle_MV
24
39
  url: http://groups.yahoo.com/group/Freecycle_MV/
25
- defunct: true
40
+ name:
41
+ description:
42
+ not_found: true
43
+ private:
44
+ age_restricted:
45
+ founded:
46
+ language:
47
+ num_members:
48
+ category:
49
+ post_email:
50
+ subscribe_email:
51
+ owner_email:
52
+ unsubscribe_email:
26
53
 
54
+ # private group
27
55
  - id: blackpool-freecycle
28
56
  url: http://groups.yahoo.com/group/blackpool-freecycle/
29
- defunct: false
57
+ name: blackpool-freecycle
58
+ description:
59
+ not_found: false
30
60
  private: true
61
+ age_restricted:
62
+ founded:
63
+ language:
64
+ num_members:
65
+ category:
66
+ post_email:
67
+ subscribe_email:
68
+ owner_email:
69
+ unsubscribe_email:
70
+
71
+ # Age restricted group
72
+ - id: NapaCountyCAFreecycle
73
+ url: http://groups.yahoo.com/group/NapaCountyCAFreecycle
74
+ name:
75
+ description:
76
+ not_found: false
77
+ private:
78
+ age_restricted: true
79
+ founded:
80
+ language:
81
+ num_members:
82
+ category:
83
+ post_email:
84
+ subscribe_email:
85
+ owner_email:
86
+ unsubscribe_email:
31
87
 
88
+ # "Related Link:" in email address element
32
89
  - id: Dursleyfreecycle
33
90
  url: http://groups.yahoo.com/group/Dursleyfreecycle/
34
91
  name: DursleyFreecycle
35
92
  description: DursleyFreecycle(R)
36
- defunct: false
93
+ not_found: false
94
+ private: false
95
+ age_restricted: false
96
+ founded: Sep 13, 2009
97
+ language: English
98
+ num_members: 1
99
+ category: Recycling
37
100
  post_email: DursleyFreecycle@yahoogroups.co.uk
38
101
  subscribe_email: DursleyFreecycle-subscribe@yahoogroups.co.uk
39
102
  owner_email: DursleyFreecycle-owner@yahoogroups.co.uk
40
- unsubscribe_email: DursleyFreecycle-unsubscribe@yahoogroups.co.uk
103
+ unsubscribe_email: DursleyFreecycle-unsubscribe@yahoogroups.co.uk
104
+
105
+ # nil Category
106
+ - id: redcar-cleveland-freegle
107
+ url: http://groups.yahoo.com/group/redcar-cleveland-freegle/
108
+ name: redcar-cleveland-freegle
109
+ description: Redcar & Cleveland Freegle
110
+ not_found: false
111
+ private: false
112
+ age_restricted: false
113
+ founded: Mar 30, 2007
114
+ language: English
115
+ num_members: 2985
116
+ category:
117
+ post_email: redcar-cleveland-freegle@yahoogroups.com
118
+ subscribe_email: redcar-cleveland-freegle-subscribe@yahoogroups.com
119
+ owner_email: redcar-cleveland-freegle-owner@yahoogroups.com
120
+ unsubscribe_email: redcar-cleveland-freegle-unsubscribe@yahoogroups.com
121
+
122
+ # Foreign language, particularly for the date format
123
+ - id:
124
+ url: http://de.groups.yahoo.com/group/freecycle-michelstadt/
125
+ name: freecycle-michelstadt
126
+ description: Freecycle Michelstadt / Odenwald
127
+ not_found: false
128
+ private: false
129
+ age_restricted: false
130
+ founded: Mai 6, 2004
131
+ language: Deutsch
132
+ num_members: 63
133
+ category: Kostenlos
134
+ post_email: freecycle-michelstadt@yahoogroups.de
135
+ subscribe_email: freecycle-michelstadt-subscribe@yahoogroups.de
136
+ owner_email: freecycle-michelstadt-owner@yahoogroups.de
137
+ unsubscribe_email: freecycle-michelstadt-unsubscribe@yahoogroups.de
138
+
@@ -0,0 +1,13 @@
1
+ # encoding: utf-8
2
+ def date_str_to_english(date_str)
3
+ date_str.
4
+ gsub(/ene/i, "Jan").
5
+ gsub(/fév|fev/i, "Feb").
6
+ gsub(/mar|mrz/i, "Mar").
7
+ gsub(/avr|abr/i, "Apr").
8
+ gsub(/mai|maj/i, "May").
9
+ gsub(/aoû|ago/i, "Aug").
10
+ gsub(/set/i, "Sep").
11
+ gsub(/okt|out/i, "Oct").
12
+ gsub(/déc|dez|dic/i, "Dec")
13
+ end
@@ -1,6 +1,8 @@
1
1
  require 'test/unit'
2
+ require 'test_helper'
2
3
  require 'webmock/test_unit'
3
4
  require 'yahoo-group-data'
5
+ require 'date'
4
6
 
5
7
  class YahooGroupDataTest < Test::Unit::TestCase
6
8
  def test_initialize_with_invalid_params
@@ -42,15 +44,29 @@ class YahooGroupDataTest < Test::Unit::TestCase
42
44
  to_return(:status => 200, :body => File.read("test/yahoo_pages/#{g_data['id']}.html"), :headers => {})
43
45
 
44
46
  group = YahooGroupData.new(g_data["url"])
45
- unless g_data["defunct"] or g_data["private"]
47
+ if g_data["not_found"]
48
+ assert_equal g_data["not_found"], group.not_found?
49
+ elsif g_data["private"]
50
+ assert_equal g_data["not_found"], group.not_found?
51
+ assert_equal g_data["private"], group.private?
52
+ elsif g_data["age_restricted"]
53
+ assert_equal g_data["not_found"], group.not_found?
54
+ assert_equal g_data["age_restricted"], group.age_restricted?
55
+ else
56
+ assert_equal g_data["age_restricted"], group.age_restricted?
57
+ assert_equal g_data["private"], group.private?
58
+ assert_equal g_data["not_found"], group.not_found?
46
59
  assert_equal g_data["name"], group.name
47
60
  assert_equal g_data["description"], group.description
48
61
  assert_equal g_data["post_email"], group.post_email
49
62
  assert_equal g_data["subscribe_email"], group.subscribe_email
50
63
  assert_equal g_data["owner_email"], group.owner_email
51
64
  assert_equal g_data["unsubscribe_email"], group.unsubscribe_email
65
+ assert_equal Date.parse(date_str_to_english(g_data["founded"])), group.founded
66
+ assert_equal g_data["language"], group.language
67
+ assert_equal g_data["num_members"], group.num_members
68
+ assert_equal g_data["category"], group.category
52
69
  end
53
- assert_equal g_data["defunct"], group.defunct?
54
70
  end
55
71
  end
56
72
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yahoo-group-data
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-01-28 00:00:00.000000000 Z
12
+ date: 2012-01-29 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &70185747420720 !ruby/object:Gem::Requirement
16
+ requirement: &70145628826300 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '1.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70185747420720
24
+ version_requirements: *70145628826300
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: curb
27
- requirement: &70185747420160 !ruby/object:Gem::Requirement
27
+ requirement: &70145628825740 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0.8'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70185747420160
35
+ version_requirements: *70145628825740
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: webmock
38
- requirement: &70185747419720 !ruby/object:Gem::Requirement
38
+ requirement: &70145628825320 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *70185747419720
46
+ version_requirements: *70145628825320
47
47
  description: A lib to fetch public Yahoo group data
48
48
  email:
49
49
  - will@willj.net
@@ -59,6 +59,7 @@ files:
59
59
  - lib/yahoo-group-data.rb
60
60
  - lib/yahoo-group-data/version.rb
61
61
  - test/groups.yml
62
+ - test/test_helper.rb
62
63
  - test/test_yahoo_group_data.rb
63
64
  - test/yahoo_pages/.gitkeep
64
65
  - yahoo-group-data.gemspec
@@ -89,5 +90,6 @@ summary: A lib to fetch the publicly available Yahoo group data from a Yahoo gro
89
90
  page
90
91
  test_files:
91
92
  - test/groups.yml
93
+ - test/test_helper.rb
92
94
  - test/test_yahoo_group_data.rb
93
95
  - test/yahoo_pages/.gitkeep