wuclan 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. data/LICENSE.textile +20 -0
  2. data/README.textile +28 -0
  3. data/examples/analyze/strong_links/gen_multi_edge.rb +103 -0
  4. data/examples/analyze/strong_links/main.rb +51 -0
  5. data/examples/analyze/word_count/dump_schema.rb +13 -0
  6. data/examples/analyze/word_count/freq_user.rb +31 -0
  7. data/examples/analyze/word_count/freq_whole_corpus.rb +27 -0
  8. data/examples/analyze/word_count/word_count.pig +43 -0
  9. data/examples/analyze/word_count/word_count.rb +34 -0
  10. data/examples/lastfm/scrape/load_lastfm.rb +31 -0
  11. data/examples/lastfm/scrape/scrape_lastfm.rb +47 -0
  12. data/examples/lastfm/scrape/seed.tsv +147 -0
  13. data/examples/twitter/old/load_twitter_search_jobs.rb +157 -0
  14. data/examples/twitter/old/scrape_twitter_api.rb +104 -0
  15. data/examples/twitter/old/scrape_twitter_search.rb +57 -0
  16. data/examples/twitter/old/scrape_twitter_trending.rb +73 -0
  17. data/examples/twitter/parse/parse_twitter_requests.rb +81 -0
  18. data/examples/twitter/parse/parse_twitter_search_requests.rb +28 -0
  19. data/examples/twitter/scrape_twitter_api/scrape_twitter_api.rb +61 -0
  20. data/examples/twitter/scrape_twitter_api/seed.tsv +4 -0
  21. data/examples/twitter/scrape_twitter_api/start_cache_twitter.sh +2 -0
  22. data/examples/twitter/scrape_twitter_api/support/make_request_stats.rb +291 -0
  23. data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_1.rb +98 -0
  24. data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_2.pig +4 -0
  25. data/examples/twitter/scrape_twitter_api/support/twitter_search_jobs.tsv +6 -0
  26. data/examples/twitter/scrape_twitter_api/support/twitter_trending_seed.tsv +725 -0
  27. data/examples/twitter/scrape_twitter_hosebird/edamame-killall +4 -0
  28. data/examples/twitter/scrape_twitter_hosebird/foo.rb +19 -0
  29. data/examples/twitter/scrape_twitter_hosebird/ps_emulation.rb +111 -0
  30. data/examples/twitter/scrape_twitter_hosebird/scrape_twitter_hosebird.rb +110 -0
  31. data/examples/twitter/scrape_twitter_hosebird/test_spewer.rb +20 -0
  32. data/examples/twitter/scrape_twitter_hosebird/twitter_hosebird_god.yaml +10 -0
  33. data/examples/twitter/scrape_twitter_search/dump_twitter_search_jobs.rb +38 -0
  34. data/examples/twitter/scrape_twitter_search/load_twitter_search_jobs.rb +63 -0
  35. data/examples/twitter/scrape_twitter_search/scrape_twitter_search.rb +44 -0
  36. data/examples/twitter/scrape_twitter_search/twitter_search_daemons.god +25 -0
  37. data/lib/old/twitter_api.rb +88 -0
  38. data/lib/wuclan/delicious/delicious_html_request.rb +31 -0
  39. data/lib/wuclan/delicious/delicious_models.rb +26 -0
  40. data/lib/wuclan/delicious/delicious_request.rb +65 -0
  41. data/lib/wuclan/friendfeed/scrape/friendfeed_search_request.rb +60 -0
  42. data/lib/wuclan/friendster.rb +7 -0
  43. data/lib/wuclan/lastfm/model/base.rb +49 -0
  44. data/lib/wuclan/lastfm/model/sample_responses.txt +16 -0
  45. data/lib/wuclan/lastfm/scrape/base.rb +195 -0
  46. data/lib/wuclan/lastfm/scrape/concrete.rb +143 -0
  47. data/lib/wuclan/lastfm/scrape/lastfm_job.rb +12 -0
  48. data/lib/wuclan/lastfm/scrape/lastfm_request_stream.rb +17 -0
  49. data/lib/wuclan/lastfm/scrape/recursive_requests.rb +154 -0
  50. data/lib/wuclan/lastfm/scrape.rb +12 -0
  51. data/lib/wuclan/lastfm.rb +7 -0
  52. data/lib/wuclan/metrics/user_graph_metrics.rb +99 -0
  53. data/lib/wuclan/metrics/user_metrics.rb +443 -0
  54. data/lib/wuclan/metrics/user_metrics_basic.rb +277 -0
  55. data/lib/wuclan/metrics/user_scraping_metrics.rb +64 -0
  56. data/lib/wuclan/metrics.rb +0 -0
  57. data/lib/wuclan/myspace.rb +21 -0
  58. data/lib/wuclan/open_social/model/base.rb +0 -0
  59. data/lib/wuclan/open_social/scrape/base.rb +111 -0
  60. data/lib/wuclan/open_social/scrape_request.rb +6 -0
  61. data/lib/wuclan/open_social.rb +0 -0
  62. data/lib/wuclan/rdf_output/relationship_rdf.rb +47 -0
  63. data/lib/wuclan/rdf_output/text_element_rdf.rb +64 -0
  64. data/lib/wuclan/rdf_output/tweet_rdf.rb +10 -0
  65. data/lib/wuclan/rdf_output/twitter_rdf.rb +84 -0
  66. data/lib/wuclan/rdf_output/twitter_user_rdf.rb +12 -0
  67. data/lib/wuclan/shorturl/shorturl_request.rb +271 -0
  68. data/lib/wuclan/twitter/api_response_examples.textile +300 -0
  69. data/lib/wuclan/twitter/model/base.rb +72 -0
  70. data/lib/wuclan/twitter/model/multi_edge.rb +31 -0
  71. data/lib/wuclan/twitter/model/relationship.rb +176 -0
  72. data/lib/wuclan/twitter/model/text_element/extract_info_tests.rb +83 -0
  73. data/lib/wuclan/twitter/model/text_element/grok_tweets.rb +96 -0
  74. data/lib/wuclan/twitter/model/text_element/more_regexes.rb +370 -0
  75. data/lib/wuclan/twitter/model/text_element.rb +38 -0
  76. data/lib/wuclan/twitter/model/tweet/tokenize.rb +38 -0
  77. data/lib/wuclan/twitter/model/tweet/tweet_regexes.rb +202 -0
  78. data/lib/wuclan/twitter/model/tweet/tweet_token.rb +79 -0
  79. data/lib/wuclan/twitter/model/tweet.rb +74 -0
  80. data/lib/wuclan/twitter/model/twitter_user/style/color_to_hsv.rb +57 -0
  81. data/lib/wuclan/twitter/model/twitter_user.rb +145 -0
  82. data/lib/wuclan/twitter/model.rb +21 -0
  83. data/lib/wuclan/twitter/parse/ff_ids_parser.rb +27 -0
  84. data/lib/wuclan/twitter/parse/friends_followers_parser.rb +52 -0
  85. data/lib/wuclan/twitter/parse/generic_json_parser.rb +26 -0
  86. data/lib/wuclan/twitter/parse/json_tweet.rb +63 -0
  87. data/lib/wuclan/twitter/parse/json_twitter_user.rb +122 -0
  88. data/lib/wuclan/twitter/parse/public_timeline_parser.rb +54 -0
  89. data/lib/wuclan/twitter/parse/twitter_search_parse.rb +60 -0
  90. data/lib/wuclan/twitter/parse/user_parser.rb +30 -0
  91. data/lib/wuclan/twitter/scrape/base.rb +97 -0
  92. data/lib/wuclan/twitter/scrape/old_skool_request_classes.rb +40 -0
  93. data/lib/wuclan/twitter/scrape/twitter_fake_fetcher.rb +31 -0
  94. data/lib/wuclan/twitter/scrape/twitter_ff_ids_request.rb +75 -0
  95. data/lib/wuclan/twitter/scrape/twitter_followers_request.rb +135 -0
  96. data/lib/wuclan/twitter/scrape/twitter_json_response.rb +124 -0
  97. data/lib/wuclan/twitter/scrape/twitter_request_stream.rb +44 -0
  98. data/lib/wuclan/twitter/scrape/twitter_search_fake_fetcher.rb +44 -0
  99. data/lib/wuclan/twitter/scrape/twitter_search_flat_stream.rb +30 -0
  100. data/lib/wuclan/twitter/scrape/twitter_search_job.rb +25 -0
  101. data/lib/wuclan/twitter/scrape/twitter_search_request.rb +70 -0
  102. data/lib/wuclan/twitter/scrape/twitter_search_request_stream.rb +19 -0
  103. data/lib/wuclan/twitter/scrape/twitter_timeline_request.rb +72 -0
  104. data/lib/wuclan/twitter/scrape/twitter_user_request.rb +64 -0
  105. data/lib/wuclan/twitter/scrape.rb +27 -0
  106. data/lib/wuclan/twitter.rb +7 -0
  107. data/lib/wuclan.rb +1 -0
  108. data/spec/spec_helper.rb +9 -0
  109. data/spec/wuclan_spec.rb +7 -0
  110. data/wuclan.gemspec +184 -0
  111. metadata +219 -0
@@ -0,0 +1,72 @@
1
+ require 'date'
2
+ module Wuclan::Twitter::Model
3
+ module ModelCommon
4
+
5
+ #
6
+ # By default, take the front num_key_fields of the flattened struct
7
+ #
8
+ def key
9
+ to_a[0..(num_key_fields-1)].join("-")
10
+ end
11
+
12
+ # ===========================================================================
13
+ #
14
+ # Metrics
15
+ #
16
+ def scrape_age
17
+ (DateTime.now - DateTime.parse_safely(scraped_at)).to_f
18
+ end
19
+
20
+ def days_since_created
21
+ (DateTime.now - DateTime.parse_safely(created_at)).to_f
22
+ end
23
+
24
+ # ===========================================================================
25
+ #
26
+ # Field conversions
27
+ #
28
+ # Make the data easier for batch flat-record processing
29
+ #
30
+
31
+ #
32
+ # Convert date into flat, uniform format
33
+ # This method is idempotent: repeated calls give same result.
34
+ #
35
+ def self.flatten_date dt
36
+ return dt if dt =~ /\d{14}/
37
+ DateTime.parse(dt).to_flat if dt
38
+ end
39
+
40
+ #
41
+ # Zero-pad IDs to a full 10 digits (the max digits for an unsigned 32-bit
42
+ # integer).
43
+ #
44
+ # nil id will be encoded as 0. Shit happens and we'd rather be idempotent
45
+ # than picky.
46
+ #
47
+ # Note that sometime in 2010 (or sooner, depending on its growth rate: in 2008
48
+ # Dec it was 1.8M/day) the status_id will exceed 32 bits. Something will
49
+ # happen then.
50
+ # This method is idempotent: repeated calls give same result.
51
+ #
52
+ def self.zeropad_id id
53
+ id ||= 0
54
+ '%010d' % [id.to_i]
55
+ end
56
+
57
+ #
58
+ # Express boolean as 1 (true) or 0 (false). In contravention of typical ruby
59
+ # semantics (but in a way that is more robust for wukong-like batch
60
+ # processing), the number 0, the string '0', nil and false are all considered
61
+ # false. (This also makes the method idempotent: repeated calls give same result.)
62
+ #
63
+ def self.unbooleanize bool
64
+ case bool
65
+ when 0, '0', false, nil
66
+ then 0
67
+ else 1
68
+ end
69
+ end
70
+ end
71
+ end
72
+
@@ -0,0 +1,31 @@
1
+ module Wuclan
2
+ module Models
3
+ class Edge < TypedStruct.new(
4
+ [:src, Integer],
5
+ [:dest, Integer]
6
+ )
7
+ end
8
+
9
+ class MultiEdge < TypedStruct.new(
10
+ [:src, Integer],
11
+ [:dest, Integer],
12
+ [:a_follows_b, Integer],
13
+ [:b_follows_a, Integer],
14
+ [:a_replies_b, Integer],
15
+ [:b_replies_a, Integer],
16
+ [:a_atsigns_b, Integer],
17
+ [:b_atsigns_a, Integer],
18
+ [:a_retweets_b, Integer],
19
+ [:b_retweets_a, Integer],
20
+ [:a_favorites_b, Integer],
21
+ [:b_favorites_a, Integer]
22
+ )
23
+ def strong?
24
+ # Symmetric relationship, easier
25
+ ( (a_follows_b && b_follows_a) &&
26
+ () )
27
+ end
28
+ end
29
+
30
+ end
31
+ end
@@ -0,0 +1,176 @@
1
+ module Wuclan::Twitter::Model
2
+ # features common to all user-user relationships.
3
+ module RelationshipBase
4
+ module ClassMethods
5
+ def rel_name
6
+ @rel_name ||= resource_name.to_s.gsub(/a_(.*)_b/, '\1')
7
+ end
8
+ end
9
+
10
+ def self.included base
11
+ base.class_eval{ extend ClassMethods }
12
+ end
13
+ end
14
+
15
+ # Follower/Friend relationship
16
+ class AFollowsB < TypedStruct.new(
17
+ [:user_a_id, Integer],
18
+ [:user_b_id, Integer]
19
+ )
20
+ include ModelCommon
21
+ include RelationshipBase
22
+ # Key on the user-user pair
23
+ def num_key_fields() 2 end
24
+ def numeric_id_fields() [:user_a_id, :user_b_id] ; end
25
+ end
26
+
27
+ # User ==favorites_tweet=> tweet ==by_user=>b
28
+ class AFavoritesB < TypedStruct.new(
29
+ [:user_a_id, Integer],
30
+ [:user_b_id, Integer],
31
+ [:status_id, Integer]
32
+ )
33
+ include ModelCommon
34
+ include RelationshipBase
35
+ # Key on user_a-user_b-status_id (really just user_a-status_id is enough)
36
+ def num_key_fields() 3 end
37
+ def numeric_id_fields() [:user_a_id, :user_b_id, :status_id] ; end
38
+ end
39
+
40
+ # Direct (threaded) replies: occur at the start of a tweet.
41
+ class ARepliesB < TypedStruct.new(
42
+ [:user_a_id, Integer],
43
+ [:user_b_id, Integer],
44
+ [:status_id, Integer],
45
+ [:in_reply_to_status_id, Integer]
46
+ )
47
+ include ModelCommon
48
+ include RelationshipBase
49
+ # Key on user_a-user_b-status_id
50
+ def num_key_fields() 3 end
51
+ def numeric_id_fields() [:user_a_id, :user_b_id, :status_id, :in_reply_to_status_id] ; end
52
+ end
53
+
54
+ # Direct (threaded) replies: occur at the start of a tweet.
55
+ class ARepliesBName < TypedStruct.new(
56
+ [:user_a_name, Integer],
57
+ [:user_b_name, Integer],
58
+ [:status_id, Integer],
59
+ [:in_reply_to_status_id, Integer],
60
+ [:user_a_sid, Integer],
61
+ [:user_b_sid, Integer]
62
+ )
63
+ include ModelCommon
64
+ include RelationshipBase
65
+ # Key on user_a-user_b-status_id
66
+ def num_key_fields() 3 end
67
+ def numeric_id_fields() [:user_a_id, :user_b_id, :status_id, :in_reply_to_status_id] ; end
68
+ end
69
+
70
+ # Atsign mentions anywhere in the tweet
71
+ # note we have no user_b_id for @foo
72
+ class AAtsignsB < TypedStruct.new(
73
+ [:user_a_id, Integer],
74
+ [:user_b_name, String],
75
+ [:status_id, Integer]
76
+ )
77
+ include ModelCommon
78
+ include RelationshipBase
79
+ # Key on user_a-user_b-status_id
80
+ def num_key_fields() 3 end
81
+ def numeric_id_fields() [:user_a_id, :status_id] ; end
82
+ end
83
+
84
+ # Atsign mentions anywhere in the tweet
85
+ # note we have no user_b_id for @foo
86
+ class AAtsignsBId < TypedStruct.new(
87
+ [:user_a_id, Integer],
88
+ [:user_b_id, Integer],
89
+ [:status_id, Integer]
90
+ )
91
+ include ModelCommon
92
+ include RelationshipBase
93
+ # Key on user_a-user_b-status_id
94
+ def num_key_fields() 3 end
95
+ def numeric_id_fields() [:user_a_id, :user_b_id, :status_id] ; end
96
+ end
97
+
98
+
99
+ #
100
+ # A re-tweet is /sent/ by user_a, repeating an earlier message by user_b
101
+ # Any tweet containing text roughly similar to
102
+ # RT @user <stuff>
103
+ # with equivalently for RT: retweet, via, retweeting
104
+ #
105
+ # !!! OR !!!
106
+ #
107
+ # A retweet whore request, something like
108
+ # pls RT Hey lookit me
109
+ #
110
+ # We just pass along both in the same data structure; the heuristic is poor
111
+ # enough that we leave it to later steps to be clever. (Note retweets and
112
+ # non-retweet-whore-requests have user_b_name set and unset respectively.)
113
+ #
114
+ # +user_a_id:+ the user who sent the re-tweet
115
+ # +status_id:+ the id of the tweet *containing* the re-tweet (for the ID of the original tweet you're on your own.)
116
+ # +user_b_name:+ the user citied as originating: RT @user_b_name
117
+ # +please_flag:+ a 1 if the text contains 'please' or 'plz' as a stand-alone word
118
+ # +text:+ the *full* text of the tweet
119
+ #
120
+ class ARetweetsB < TypedStruct.new(
121
+ [:user_a_id, Integer],
122
+ [:user_b_name, String],
123
+ [:status_id, Integer],
124
+ [:please_flag, Integer],
125
+ [:text, String]
126
+ )
127
+ include ModelCommon
128
+ include RelationshipBase
129
+
130
+ def initialize *args
131
+ super *args
132
+ self.please_flag = ModelCommon.unbooleanize(self.please_flag)
133
+ end
134
+ # Key on retweeting_user-user-tweet_id
135
+ def num_key_fields() 3 end
136
+ def numeric_id_fields() [:user_a_id, :status_id] ; end
137
+ #
138
+ # If there's no user we'll assume this
139
+ # is a retweet and not an rtwhore.
140
+ #
141
+ def is_retweet?
142
+ ! user_b_name.blank?
143
+ end
144
+ end
145
+
146
+ class ARetweetsBId < TypedStruct.new(
147
+ [:user_a_id, Integer],
148
+ [:user_b_id, Integer],
149
+ [:status_id, Integer],
150
+ [:please_flag, Integer],
151
+ [:text, String]
152
+ )
153
+ include ModelCommon
154
+ include RelationshipBase
155
+
156
+ def initialize *args
157
+ super *args
158
+ self.please_flag = ModelCommon.unbooleanize(self.please_flag)
159
+ end
160
+
161
+ # Key on retweeting_user-user-tweet_id
162
+ def num_key_fields() 3 end
163
+ def numeric_id_fields() [:user_a_id, :user_b_id, :status_id] ; end
164
+
165
+ #
166
+ # If there's no user we'll assume this
167
+ # is a retweet and not an rtwhore.
168
+ #
169
+ def is_retweet?
170
+ ! user_b_name.blank?
171
+ end
172
+ end
173
+
174
+
175
+
176
+ end
@@ -0,0 +1,83 @@
1
+
2
+ # ===========================================================================
3
+ #
4
+ # tests = [
5
+ # 'http://foo.us/',
6
+ # 'http://foo.us/a',
7
+ # 'http://foo.us/a?q=a',
8
+ # 'http://foo.us/a#a',
9
+ # 'http://foo.us/a?q=a&b=3#a',
10
+ # 'http://foo.us/a;2/~m-_.\':%+@,;?q=a&b=3#a',
11
+ # 'http://foo.us/a?q=a?',
12
+ # 'http://foo.us/=a#a',
13
+ # 'http://foo.us/a&?q=a&b=3#a',
14
+ # 'http://foo.us/a;2/~m-_.\':%+@,;?q=a&b=3#a&',
15
+ # ]
16
+ # tests.each do |test_str|
17
+ # p test_str.scan(RE_URL)
18
+ # end
19
+ #
20
+ # atsign_tests = [
21
+ # '@foo hello',
22
+ # ' @foo @hello ',
23
+ # ' @foo, @hello ',
24
+ # '-@foo,@hello',
25
+ # '@foo@bar ',
26
+ # 'a basdf@foo b',
27
+ # 'http://@foo',
28
+ # 'foo@bar @bar@foo @zz+',
29
+ # ].each do |test_str|
30
+ # p test_str.scan(RE_ATSIGNS)
31
+ # end
32
+ #
33
+ # hash_tag_tests = [
34
+ # '#downtown',
35
+ # '#downtown?',
36
+ # '#downtown.',
37
+ # '#downtown]',
38
+ # '#downtown}',
39
+ # '#downtown)',
40
+ # '#downtown,',
41
+ # '#downtown;',
42
+ # '#downtown\'',
43
+ # '#downtown\'s',
44
+ # '#downtown_',
45
+ # '#down+town',
46
+ # '#down_town',
47
+ # '#down-town',
48
+ # '#www.downtown.com',
49
+ # '#www.downtown.com.',
50
+ # '##',
51
+ # '#.',
52
+ # '#taxonomy:binomial=Alcedo_atthis',
53
+ # '#geo:lat=52.478342',
54
+ # '#geo:lon=53.609130',
55
+ # 'a#www.downtown.com.',
56
+ # ' #www.downtown.com.',
57
+ # ' =#www.downtown.com.!',
58
+ # ].each do |test_str|
59
+ # p test_str.scan(RE_HASHTAGS)
60
+ # end
61
+ #
62
+ #
63
+ # # #downtown
64
+ # # #downtown
65
+ # # #downtown
66
+ # # #downtown
67
+ # # #downtown
68
+ # # #downtown
69
+ # # #downtown
70
+ # # #downtown
71
+ # # #downtown
72
+ # # #downtown
73
+ # # #downtown_
74
+ # # #down+town
75
+ # # #down_town
76
+ # # #down-town
77
+ # # #www.downtown.com
78
+ # # #www.downtown.com
79
+ # #
80
+ # #
81
+ # # #taxonomy:binomial=Alcedo_atthis
82
+ # # #geo:lat=52.478342
83
+ # # #geo:lon=53.609130
@@ -0,0 +1,96 @@
1
+ require 'wuclan/grok/tweet_regexes'
2
+ require 'wuclan/models'
3
+ require 'wukong/encoding'
4
+ include Wuclan::Grok::TweetRegexes
5
+ include Wuclan::Models
6
+
7
+ Tweet.class_eval do
8
+
9
+ #
10
+ #
11
+ #
12
+ def tweet_len
13
+ decoded_text.length
14
+ end
15
+
16
+ #
17
+ # Any mention of another user, whether at the beginning of a line (and thus
18
+ # *also* an ARepliesB), a retweet, or just somewhere in the body of the text
19
+ #
20
+ def replies
21
+ unless (in_reply_to_user_id.blank?) || (in_reply_to_user_id.to_i == 0)
22
+ ARepliesB.new(twitter_user_id, in_reply_to_user_id, self.id, in_reply_to_status_id)
23
+ end
24
+ end
25
+
26
+ #
27
+ # Any mention of another user, whether at the beginning of a line (and thus
28
+ # *also* an ARepliesB), a retweet, or just somewhere in the body of the text
29
+ #
30
+ def atsigns
31
+ matches = decoded_text.scan(RE_ATSIGNS)
32
+ matches.map do |user_b_name|
33
+ user_b_name = user_b_name.first.wukong_encode
34
+ AAtsignsB.new(twitter_user_id, user_b_name, self.id)
35
+ end
36
+ end
37
+
38
+ #
39
+ # Remember that a retweet could be an actual retweet, a retweet whore request,
40
+ # or a retweet of a retweet whore request.
41
+ #
42
+ # Or, it could have just fooled us.
43
+ #
44
+ # Anyway you can take it from here.
45
+ #
46
+ def retweets
47
+ please_flag = RE_RTWHORE.match(decoded_text)
48
+ retweet_match = RE_RETWEET.match(decoded_text)
49
+ return unless please_flag || retweet_match
50
+ user_b_name = retweet_match.captures.first.wukong_encode if retweet_match
51
+ ARetweetsB.new(twitter_user_id, user_b_name, self.id, please_flag, self.text)
52
+ end
53
+
54
+ #
55
+ # Hashtags indicate a topic: #hashtag
56
+ #
57
+ def hashtags
58
+ matches = decoded_text.scan(RE_HASHTAGS)
59
+ matches.map do |hashtag_text|
60
+ hashtag_text = hashtag_text.first.wukong_encode
61
+ Hashtag.new(hashtag_text, self.id, twitter_user_id)
62
+ end
63
+ end
64
+
65
+ #
66
+ # URLs within a tweet.
67
+ # can be multiple per tweet.
68
+ #
69
+ # Uses a regexp more selective than all canonically allowed - see
70
+ # tweet_regexes
71
+ #
72
+ def tweet_urls
73
+ matches = decoded_text.scan(RE_URL)
74
+ matches.map do |tweet_url_text|
75
+ tweet_url_text = tweet_url_text.first.wukong_encode
76
+ TweetUrl.new(tweet_url_text, self.id, twitter_user_id)
77
+ end
78
+ end
79
+
80
+ def text_elements
81
+ # replies # done in tweet??
82
+ # atsigns
83
+ # tweet_url
84
+ # hashtags
85
+ # tweet length
86
+ # words
87
+ [
88
+ replies,
89
+ atsigns,
90
+ retweets,
91
+ hashtags,
92
+ tweet_urls,
93
+ ].compact.flatten
94
+ end
95
+
96
+ end