yahoo-group-data 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -11,6 +11,7 @@ g = YahooGroupData.new("http://tech.groups.yahoo.com/group/OneStopCOBOL/")
11
11
 
12
12
  name = g.name
13
13
  description = g.description
14
+ num_members = g.num_members
14
15
 
15
16
  p = g.post_email
16
17
  s = g.subscribe_email
@@ -18,6 +19,32 @@ u = g.unsubscribe_email
18
19
  o = g.unsubscribe_email
19
20
  ```
20
21
 
22
+ ### Available instance methods
23
+
24
+ These should be relatively self-explanatory. Where the data is unnavailable (for instance the group name if no group was found) the return value will be nil
25
+
26
+ #### Boolean values:
27
+
28
+ - not_found?
29
+ - private?
30
+ - age_restricted?
31
+
32
+ #### String values
33
+
34
+ - name
35
+ - description
36
+ - post_email
37
+ - subscribe_email
38
+ - owner_email
39
+ - unsubscribe_email
40
+ - language
41
+ - category
42
+
43
+ #### Other values
44
+
45
+ - num_members (Integer)
46
+ - founded (Date)
47
+
21
48
  ## Requirements
22
49
 
23
50
  It's tested with Ruby 1.9.3, it probably works with older versions.
@@ -36,14 +63,6 @@ Or install it yourself as:
36
63
 
37
64
  $ gem install yahoo-group-data
38
65
 
39
- ## TODO
40
-
41
- * Parse out
42
- * * Number of members
43
- * * Founded date
44
- * * Category
45
- * * Language
46
-
47
66
  ## Contributing
48
67
 
49
68
  1. Fork it
@@ -58,11 +77,11 @@ Or install it yourself as:
58
77
 
59
78
  Rather than distribute a load of Yahoo's HTML pages with the gem there's a rake task to get the ones that are needed. Run:
60
79
 
61
- `rake fetch_yahoo_pages`
80
+ $ rake fetch_yahoo_pages
62
81
 
63
82
  after that:
64
83
 
65
- rake test
84
+ $ rake test
66
85
 
67
86
  ### If you find a group the gem fails on
68
87
 
@@ -1,3 +1,3 @@
1
1
  class YahooGroupData
2
- VERSION = "0.0.1"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -3,6 +3,7 @@ require 'yahoo-group-data/version'
3
3
  require 'curb'
4
4
  require 'uri'
5
5
  require 'nokogiri'
6
+ require 'date'
6
7
 
7
8
  class YahooGroupData
8
9
  def initialize(url)
@@ -11,11 +12,11 @@ class YahooGroupData
11
12
  curb = Curl::Easy.new(url)
12
13
  curb.follow_location = true
13
14
  curb.http_get
14
- @html = curb.body_str
15
+ @html = curb.body_str.force_encoding('iso-8859-1').encode("UTF-8")
15
16
  end
16
17
 
17
18
  def name
18
- doc.css('span.ygrp-pname').first.content
19
+ @name ||= no_data? ? nil : doc.css('span.ygrp-pname').first.content
19
20
  end
20
21
 
21
22
  def description
@@ -26,38 +27,89 @@ class YahooGroupData
26
27
  end
27
28
 
28
29
  def post_email
29
- subscribe_email.gsub("-subscribe@", "@")
30
+ @post_email ||= no_data? ? nil : subscribe_email.gsub("-subscribe@", "@")
30
31
  end
31
32
 
32
33
  def subscribe_email
33
- doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-subscribe@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
34
+ @subscribe_email ||= no_data? ? nil : doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-subscribe@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
34
35
  end
35
36
 
36
37
  def owner_email
37
- doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-owner@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
38
+ @owner_email ||= no_data? ? nil : doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-owner@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
38
39
  end
39
40
 
40
41
  def unsubscribe_email
41
- doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-unsubscribe@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
42
+ @unsubscribe_email ||= no_data? ? nil : doc.css('div#ygrp-links div.ygrp-contentblock').first.content.match(/(\S*-unsubscribe@[a-z]*yahoo[a-z]*\.[a-z\.]+)/)[1]
42
43
  end
43
44
 
44
45
  def private?
45
46
  @private_group ||= (
46
- not_found_element = doc.xpath('/html/body/div[3]/center/p/big')
47
- not_found_element.size > 0 and not_found_element.first.content.strip.match(/Sorry, this group is available to members ONLY./i) ? true : false
48
- )
47
+ doc.xpath('/html/body/div[3]/center/p/big').size > 0 and
48
+ doc.xpath('/html/body/div[3]/center/p/big').first.content.strip.match(/Sorry, this group is available to members ONLY./i)
49
+ ) ? true : false
49
50
  end
50
51
 
51
- def defunct?
52
- @defunct ||= (
53
- not_found_element = doc.xpath('/html/body/div[3]/div/div/div/h3')
54
- not_found_element.size > 0 and not_found_element.first.content.strip.match(/Group Not Found|Group nicht gefunden/i) ? true : false
52
+ def not_found?
53
+ @not_found ||= (
54
+ (
55
+ doc.xpath('/html/body/div[3]/div/div/div/h3').size > 0 and
56
+ doc.xpath('/html/body/div[3]/div/div/div/h3').first.content.strip.match(/Group Not Found|Group nicht gefunden/i)
57
+ ) ? true : false
55
58
  )
56
59
  end
57
60
 
61
+ def age_restricted?
62
+ @age_restricted ||= (doc.xpath('/html/body/div[3]/div/div/div/h4').size > 0 and doc.xpath('/html/body/div[3]/div/div/div/h4').first.inner_html.strip.match(/You've reached an Age-Restricted Area/i)) ? true : false
63
+ end
64
+
65
+ def founded
66
+ @founded ||= no_data? ? nil : Date.parse(date_str_to_english(doc.xpath("//ul[@class=\"ygrp-ul ygrp-info\"]//li[#{has_category? ? 3 : 2}]").inner_html.split(':')[1].strip))
67
+ end
68
+
69
+ def language
70
+ @language ||= no_data? ? nil : doc.xpath("//ul[@class=\"ygrp-ul ygrp-info\"]//li[#{has_category? ? 4 : 3}]").inner_html.split(':')[1].strip
71
+ end
72
+
73
+ def num_members
74
+ Integer(doc.xpath('//ul[@class="ygrp-ul ygrp-info"]//li[1]').inner_html.split(':')[1].strip)
75
+ end
76
+
77
+ def category
78
+ return unless has_category?
79
+ doc.xpath('/html/body/div[3]/table/tr/td/div[2]/div[2]/div/ul/li[2]/a').inner_html
80
+ end
81
+
58
82
  private
59
83
 
60
- attr_reader :html, :doc, :defunct #, :private_group
84
+ def no_data?
85
+ private? or age_restricted? or not_found?
86
+ end
87
+
88
+ # French: jan,fév,mar,avr,mai,jun,jul,aoû,sep,oct,nov,déc
89
+ # German: jan,feb,mrz,apr,mai,jun,jul,aug,sep,okt,nov,dez
90
+ # Portuguese: jan,fev,mar,abr,mai,jun,jul,ago,set,out,nov,dez
91
+ # Spanish: ene,feb,mar,abr,may,jun,jul,ago,sep,oct,nov,dic
92
+ # Swedish: jan,feb,mar,apr,maj,jun,jul,aug,sep,okt,nov,dec
93
+ # US / UK: jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec
94
+
95
+ def date_str_to_english(date_str)
96
+ date_str.
97
+ gsub(/ene/i, "Jan").
98
+ gsub(/fév|fev/i, "Feb").
99
+ gsub(/mar|mrz/i, "Mar").
100
+ gsub(/avr|abr/i, "Apr").
101
+ gsub(/mai|maj/i, "May").
102
+ gsub(/aoû|ago/i, "Aug").
103
+ gsub(/set/i, "Sep").
104
+ gsub(/okt|out/i, "Oct").
105
+ gsub(/déc|dez|dic/i, "Dec")
106
+ end
107
+
108
+ attr_reader :html, :doc
109
+
110
+ def has_category?
111
+ doc.xpath('//ul[@class="ygrp-ul ygrp-info"]//li').count == 3 ? false : true
112
+ end
61
113
 
62
114
  def doc
63
115
  @doc ||= Nokogiri::HTML(html)
data/test/groups.yml CHANGED
@@ -1,40 +1,138 @@
1
1
  ---
2
2
  groups:
3
+ # Baseline group
3
4
  - id: OneStopCOBOL
4
5
  url: http://tech.groups.yahoo.com/group/OneStopCOBOL/
5
6
  name: OneStopCOBOL
6
7
  description: OneStopCOBOL - Official COBOL group
7
- defunct: false
8
+ not_found: false
9
+ private: false
10
+ age_restricted: false
11
+ founded: Jun 24, 2008
12
+ language: English
13
+ num_members: 151
14
+ category: COBOL
8
15
  post_email: OneStopCOBOL@yahoogroups.com
9
16
  subscribe_email: OneStopCOBOL-subscribe@yahoogroups.com
10
17
  owner_email: OneStopCOBOL-owner@yahoogroups.com
11
18
  unsubscribe_email: OneStopCOBOL-unsubscribe@yahoogroups.com
12
19
 
20
+ # nil description
13
21
  - id: Cambridge-Freegle
14
22
  url: http://groups.yahoo.com/group/Cambridge-Freegle/
15
23
  name: Cambridge-Freegle
16
24
  description:
17
- defunct: false
25
+ not_found: false
26
+ private: false
27
+ age_restricted: false
28
+ founded: Aug 24, 2011
29
+ language: English
30
+ num_members: 160
31
+ category: Recycling
18
32
  post_email: Cambridge-Freegle@yahoogroups.com
19
33
  subscribe_email: Cambridge-Freegle-subscribe@yahoogroups.com
20
34
  owner_email: Cambridge-Freegle-owner@yahoogroups.com
21
35
  unsubscribe_email: Cambridge-Freegle-unsubscribe@yahoogroups.com
22
36
 
37
+ # non-existant group
23
38
  - id: Freecycle_MV
24
39
  url: http://groups.yahoo.com/group/Freecycle_MV/
25
- defunct: true
40
+ name:
41
+ description:
42
+ not_found: true
43
+ private:
44
+ age_restricted:
45
+ founded:
46
+ language:
47
+ num_members:
48
+ category:
49
+ post_email:
50
+ subscribe_email:
51
+ owner_email:
52
+ unsubscribe_email:
26
53
 
54
+ # private group
27
55
  - id: blackpool-freecycle
28
56
  url: http://groups.yahoo.com/group/blackpool-freecycle/
29
- defunct: false
57
+ name: blackpool-freecycle
58
+ description:
59
+ not_found: false
30
60
  private: true
61
+ age_restricted:
62
+ founded:
63
+ language:
64
+ num_members:
65
+ category:
66
+ post_email:
67
+ subscribe_email:
68
+ owner_email:
69
+ unsubscribe_email:
70
+
71
+ # Age restricted group
72
+ - id: NapaCountyCAFreecycle
73
+ url: http://groups.yahoo.com/group/NapaCountyCAFreecycle
74
+ name:
75
+ description:
76
+ not_found: false
77
+ private:
78
+ age_restricted: true
79
+ founded:
80
+ language:
81
+ num_members:
82
+ category:
83
+ post_email:
84
+ subscribe_email:
85
+ owner_email:
86
+ unsubscribe_email:
31
87
 
88
+ # "Related Link:" in email address element
32
89
  - id: Dursleyfreecycle
33
90
  url: http://groups.yahoo.com/group/Dursleyfreecycle/
34
91
  name: DursleyFreecycle
35
92
  description: DursleyFreecycle(R)
36
- defunct: false
93
+ not_found: false
94
+ private: false
95
+ age_restricted: false
96
+ founded: Sep 13, 2009
97
+ language: English
98
+ num_members: 1
99
+ category: Recycling
37
100
  post_email: DursleyFreecycle@yahoogroups.co.uk
38
101
  subscribe_email: DursleyFreecycle-subscribe@yahoogroups.co.uk
39
102
  owner_email: DursleyFreecycle-owner@yahoogroups.co.uk
40
- unsubscribe_email: DursleyFreecycle-unsubscribe@yahoogroups.co.uk
103
+ unsubscribe_email: DursleyFreecycle-unsubscribe@yahoogroups.co.uk
104
+
105
+ # nil Category
106
+ - id: redcar-cleveland-freegle
107
+ url: http://groups.yahoo.com/group/redcar-cleveland-freegle/
108
+ name: redcar-cleveland-freegle
109
+ description: Redcar & Cleveland Freegle
110
+ not_found: false
111
+ private: false
112
+ age_restricted: false
113
+ founded: Mar 30, 2007
114
+ language: English
115
+ num_members: 2985
116
+ category:
117
+ post_email: redcar-cleveland-freegle@yahoogroups.com
118
+ subscribe_email: redcar-cleveland-freegle-subscribe@yahoogroups.com
119
+ owner_email: redcar-cleveland-freegle-owner@yahoogroups.com
120
+ unsubscribe_email: redcar-cleveland-freegle-unsubscribe@yahoogroups.com
121
+
122
+ # Foreign language, particularly for the date format
123
+ - id:
124
+ url: http://de.groups.yahoo.com/group/freecycle-michelstadt/
125
+ name: freecycle-michelstadt
126
+ description: Freecycle Michelstadt / Odenwald
127
+ not_found: false
128
+ private: false
129
+ age_restricted: false
130
+ founded: Mai 6, 2004
131
+ language: Deutsch
132
+ num_members: 63
133
+ category: Kostenlos
134
+ post_email: freecycle-michelstadt@yahoogroups.de
135
+ subscribe_email: freecycle-michelstadt-subscribe@yahoogroups.de
136
+ owner_email: freecycle-michelstadt-owner@yahoogroups.de
137
+ unsubscribe_email: freecycle-michelstadt-unsubscribe@yahoogroups.de
138
+
@@ -0,0 +1,13 @@
1
+ # encoding: utf-8
2
+ def date_str_to_english(date_str)
3
+ date_str.
4
+ gsub(/ene/i, "Jan").
5
+ gsub(/fév|fev/i, "Feb").
6
+ gsub(/mar|mrz/i, "Mar").
7
+ gsub(/avr|abr/i, "Apr").
8
+ gsub(/mai|maj/i, "May").
9
+ gsub(/aoû|ago/i, "Aug").
10
+ gsub(/set/i, "Sep").
11
+ gsub(/okt|out/i, "Oct").
12
+ gsub(/déc|dez|dic/i, "Dec")
13
+ end
@@ -1,6 +1,8 @@
1
1
  require 'test/unit'
2
+ require 'test_helper'
2
3
  require 'webmock/test_unit'
3
4
  require 'yahoo-group-data'
5
+ require 'date'
4
6
 
5
7
  class YahooGroupDataTest < Test::Unit::TestCase
6
8
  def test_initialize_with_invalid_params
@@ -42,15 +44,29 @@ class YahooGroupDataTest < Test::Unit::TestCase
42
44
  to_return(:status => 200, :body => File.read("test/yahoo_pages/#{g_data['id']}.html"), :headers => {})
43
45
 
44
46
  group = YahooGroupData.new(g_data["url"])
45
- unless g_data["defunct"] or g_data["private"]
47
+ if g_data["not_found"]
48
+ assert_equal g_data["not_found"], group.not_found?
49
+ elsif g_data["private"]
50
+ assert_equal g_data["not_found"], group.not_found?
51
+ assert_equal g_data["private"], group.private?
52
+ elsif g_data["age_restricted"]
53
+ assert_equal g_data["not_found"], group.not_found?
54
+ assert_equal g_data["age_restricted"], group.age_restricted?
55
+ else
56
+ assert_equal g_data["age_restricted"], group.age_restricted?
57
+ assert_equal g_data["private"], group.private?
58
+ assert_equal g_data["not_found"], group.not_found?
46
59
  assert_equal g_data["name"], group.name
47
60
  assert_equal g_data["description"], group.description
48
61
  assert_equal g_data["post_email"], group.post_email
49
62
  assert_equal g_data["subscribe_email"], group.subscribe_email
50
63
  assert_equal g_data["owner_email"], group.owner_email
51
64
  assert_equal g_data["unsubscribe_email"], group.unsubscribe_email
65
+ assert_equal Date.parse(date_str_to_english(g_data["founded"])), group.founded
66
+ assert_equal g_data["language"], group.language
67
+ assert_equal g_data["num_members"], group.num_members
68
+ assert_equal g_data["category"], group.category
52
69
  end
53
- assert_equal g_data["defunct"], group.defunct?
54
70
  end
55
71
  end
56
72
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yahoo-group-data
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-01-28 00:00:00.000000000 Z
12
+ date: 2012-01-29 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &70185747420720 !ruby/object:Gem::Requirement
16
+ requirement: &70145628826300 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '1.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70185747420720
24
+ version_requirements: *70145628826300
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: curb
27
- requirement: &70185747420160 !ruby/object:Gem::Requirement
27
+ requirement: &70145628825740 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0.8'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70185747420160
35
+ version_requirements: *70145628825740
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: webmock
38
- requirement: &70185747419720 !ruby/object:Gem::Requirement
38
+ requirement: &70145628825320 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *70185747419720
46
+ version_requirements: *70145628825320
47
47
  description: A lib to fetch public Yahoo group data
48
48
  email:
49
49
  - will@willj.net
@@ -59,6 +59,7 @@ files:
59
59
  - lib/yahoo-group-data.rb
60
60
  - lib/yahoo-group-data/version.rb
61
61
  - test/groups.yml
62
+ - test/test_helper.rb
62
63
  - test/test_yahoo_group_data.rb
63
64
  - test/yahoo_pages/.gitkeep
64
65
  - yahoo-group-data.gemspec
@@ -89,5 +90,6 @@ summary: A lib to fetch the publicly available Yahoo group data from a Yahoo gro
89
90
  page
90
91
  test_files:
91
92
  - test/groups.yml
93
+ - test/test_helper.rb
92
94
  - test/test_yahoo_group_data.rb
93
95
  - test/yahoo_pages/.gitkeep