curate_tumblr 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. data/.project +18 -0
  2. data/README.md +219 -0
  3. data/Rakefile +0 -0
  4. data/curate_tumblr.gemspec +12 -0
  5. data/example/kubricklove/kubricklove_config.yaml +20 -0
  6. data/example/kubricklove/links/kubricklove_links +4 -0
  7. data/example/kubricklove_follow.rb +6 -0
  8. data/example/kubricklove_reblog.rb +6 -0
  9. data/example/readme +9 -0
  10. data/lib/curate_tumblr.rb +25 -0
  11. data/lib/curate_tumblr/curator.rb +200 -0
  12. data/lib/curate_tumblr/publish/follow.rb +62 -0
  13. data/lib/curate_tumblr/publish/post.rb +21 -0
  14. data/lib/curate_tumblr/publish/reblog.rb +86 -0
  15. data/lib/curate_tumblr/render/render_follow.rb +29 -0
  16. data/lib/curate_tumblr/render/render_links.rb +132 -0
  17. data/lib/curate_tumblr/render/render_reblog.rb +36 -0
  18. data/lib/curate_tumblr/tumblr/client.rb +347 -0
  19. data/lib/curate_tumblr/tumblr/extract_links.rb +190 -0
  20. data/lib/curate_tumblr/tumblr/infos.rb +102 -0
  21. data/lib/curate_tumblr/utilities/monkey.rb +5 -0
  22. data/lib/curate_tumblr/utilities/utilities.rb +4 -0
  23. data/lib/curate_tumblr/utilities/utilities_client.rb +103 -0
  24. data/lib/curate_tumblr/utilities/utilities_file.rb +50 -0
  25. data/lib/curate_tumblr/utilities/utilities_format.rb +91 -0
  26. data/lib/curate_tumblr/utilities/utilities_validate.rb +54 -0
  27. data/lib/curate_tumblr/values.rb +22 -0
  28. data/spec/curate_tumblr/curator_spec.rb +36 -0
  29. data/spec/curate_tumblr/publish/follow_spec.rb +183 -0
  30. data/spec/curate_tumblr/publish/post_spec.rb +45 -0
  31. data/spec/curate_tumblr/publish/reblog_spec.rb +118 -0
  32. data/spec/curate_tumblr/render/render_follow_spec.rb +36 -0
  33. data/spec/curate_tumblr/render/render_reblog_spec.rb +73 -0
  34. data/spec/curate_tumblr/tumblr/client_spec.rb +69 -0
  35. data/spec/curate_tumblr/tumblr/extract_links_spec.rb +204 -0
  36. data/spec/curate_tumblr/utilities/utilities_validate_spec.rb +27 -0
  37. data/spec/factories.rb +24 -0
  38. data/spec/shared_examples.rb +2 -0
  39. data/spec/shared_values.rb +203 -0
  40. data/spec/spec_helper.rb +95 -0
  41. metadata +116 -0
@@ -0,0 +1,190 @@
1
+ module CurateTumblr
2
+ module Tumblr
3
+ module ExtractLinks
4
+ attr_reader :all_external_links, :links_tofollow
5
+
6
+ REGEX_TUMBLR_URL = ".*.tumblr.com"
7
+ REGEX_TUMBLR_POST_SLUG_URL = "post/.*/"
8
+ REGEX_TUMBLR_POST_URL = "post/.*"
9
+ REGEX_TUMBLR_REBLOG_URL = "reblog/.*/"
10
+ REGEX_TUMBLR_URL_REBLOG_URL_SPEC = "http%3A%2F%2F.*.tumblr.com"
11
+ REGEX_TUMBLR_URL_REBLOG_URL = "http://.*.tumblr.com"
12
+ REGEX_TUMBLR_GLOBALKEY_REBLOG_URL = "reblog/.*"
13
+ REGEX_TUMBLR_KEY_REDIRECT_REBLOG_URL = "/.*\\?"
14
+ REGEX_TUMBLR_KEY_REBLOG_URL = "/.*"
15
+ # REGEX_EXTRACT_TUMBLR = "href=.*.tumblr.com"
16
+ REGEXS_EXTRACTS_TUMBLR = [ "\".*.tumblr.com\"", "\".*.tumblr.com/\"", "\".*.tumblr.com/post", ".*.tumblr.com/post" ]
17
+ #REGEX_EXTERNALS_FLICKR = [ "\".*flickr.com/photos/.*\"" ]
18
+ REGEX_EXTERNALS_LINKS = [ "href=\".*.com.*\""]
19
+
20
+ class << self
21
+ def get_tumblr_url( url )
22
+ return false if !tumblr_url?( url )
23
+ tumblr_url = url.scan(/#{REGEX_TUMBLR_URL}/).first
24
+ CurateTumblr.format_tumblr_url!( tumblr_url )
25
+ tumblr_url
26
+ end
27
+
28
+ def get_tumblr_from_reblog_url( reblog_url )
29
+ return false if !tumblr_reblog_url?( reblog_url )
30
+ if /#{REGEX_TUMBLR_URL_REBLOG_URL_SPEC}/ =~ reblog_url
31
+ tumblr_url = reblog_url.scan( /#{REGEX_TUMBLR_URL_REBLOG_URL_SPEC}/ ).first
32
+ elsif /#{REGEX_TUMBLR_URL_REBLOG_URL}/ =~ reblog_url
33
+ tumblr_url = reblog_url.scan( /#{REGEX_TUMBLR_URL_REBLOG_URL}/ ).first
34
+ else
35
+ tumblr_url = ""
36
+ end
37
+ CurateTumblr.format_tumblr_url!( tumblr_url )
38
+ tumblr_url
39
+ end
40
+
41
+ def get_post_id_from_post_url( url )
42
+ return false if !tumblr_post_url?( url )
43
+ return CurateTumblr.format_post_id( url.scan( /#{REGEX_TUMBLR_POST_SLUG_URL}/ ).first.gsub('post/', '') ) if /#{REGEX_TUMBLR_POST_SLUG_URL}/ =~ url
44
+ return CurateTumblr.format_post_id( url.scan( /#{REGEX_TUMBLR_POST_URL}/ ).first.gsub('post/', '') ) if /#{REGEX_TUMBLR_POST_URL}/ =~ url
45
+ false
46
+ end
47
+
48
+ def get_post_id_from_reblog_url( url )
49
+ return false if !tumblr_reblog_url?( url )
50
+ CurateTumblr.format_post_id( url.scan(/#{REGEX_TUMBLR_REBLOG_URL}/).first.gsub('reblog/', '') )
51
+ end
52
+
53
+ def get_reblog_key_from_reblog_url( url )
54
+ return false if !tumblr_reblog_url?( url )
55
+ global_key = url.scan(/#{REGEX_TUMBLR_GLOBALKEY_REBLOG_URL}/).first.gsub('reblog/', '')
56
+ return false if global_key.empty?
57
+ return CurateTumblr.format_post_reblog_key( global_key.scan( /#{REGEX_TUMBLR_KEY_REDIRECT_REBLOG_URL}/ ).first ) if /#{REGEX_TUMBLR_KEY_REDIRECT_REBLOG_URL}/ =~ global_key
58
+ return CurateTumblr.format_post_reblog_key( global_key.scan( /#{REGEX_TUMBLR_KEY_REBLOG_URL}/ ).first ) if /#{REGEX_TUMBLR_KEY_REBLOG_URL}/ =~ global_key
59
+ false
60
+ end
61
+
62
+ def simple_tumblr_url?( url )
63
+ return true if /#{REGEX_TUMBLR_URL}/ =~ url
64
+ false
65
+ end
66
+
67
+ def tumblr_url?( url )
68
+ return true if simple_tumblr_url?( url ) &&
69
+ !url.index("<a ") && !url.index("</a>") && !url.index("</a>")
70
+ false
71
+ end
72
+
73
+ def tumblr_post_url?( url )
74
+ return false if !tumblr_url?( url )
75
+ return true if /#{REGEX_TUMBLR_POST_SLUG_URL}/ =~ url
76
+ return true if /#{REGEX_TUMBLR_POST_URL}/ =~ url
77
+ false
78
+ end
79
+
80
+ def tumblr_reblog_url?( url )
81
+ return false if !tumblr_url?( url )
82
+ return true if /#{REGEX_TUMBLR_REBLOG_URL}/ =~ url
83
+ false
84
+ end
85
+
86
+ def valid_tumblr_url?( tumblr_url )
87
+ REGEXS_EXTRACTS_TUMBLR.each do |regex|
88
+ if /#{regex}/ =~ tumblr_url
89
+ else
90
+ return false
91
+ end
92
+ true
93
+ end
94
+ end
95
+
96
+ def get_tumblr_links_from_regexs_caption( ar_regexs, caption )
97
+ ar_links = []
98
+ ar_regexs.each { |regex| ar_links += get_tumblr_link_from_regex_caption( regex, caption ) }
99
+ ar_links
100
+ end
101
+
102
+ def get_tumblr_link_from_regex_caption( regex, caption )
103
+ if /#{regex}/ =~ caption
104
+ return CurateTumblr.get_format_ar_tumblrs_urls( caption.scan( /#{regex}/ ) )
105
+ end
106
+ []
107
+ end
108
+
109
+ def get_links_from_regexs_caption( ar_regexs, caption )
110
+ ar_links = []
111
+ ar_regexs.each { |regex| ar_links += get_link_from_regex_caption( regex, caption ) }
112
+ ar_links
113
+ end
114
+
115
+ def get_link_from_regex_caption( regex, caption )
116
+ if /#{regex}/ =~ caption
117
+ return CurateTumblr.get_format_ar_urls( caption.scan( /#{regex}/ ) )
118
+ end
119
+ []
120
+ end
121
+
122
+ def get_urls_only_tumblr( ar_urls )
123
+ ar_new_urls = []
124
+ ar_urls.each do |url|
125
+ ar_new_urls << url if tumblr_url?( url )
126
+ end
127
+ ar_new_urls
128
+ end
129
+
130
+ def get_urls_not_tumblr( ar_urls )
131
+ ar_new_urls = []
132
+ ar_urls.each do |url|
133
+ ar_new_urls << url if !simple_tumblr_url?( url )
134
+ end
135
+ ar_new_urls
136
+ end
137
+
138
+ def get_tumblr_urls_from_text( text )
139
+ raise "text #{text.class} is not a String" if !text.is_a? String
140
+ ar_urls = get_tumblr_links_from_regexs_caption( REGEXS_EXTRACTS_TUMBLR, text )
141
+ ar_urls = get_urls_only_tumblr( ar_urls )
142
+ ar_urls = CurateTumblr.get_format_ar_tumblr_urls( ar_urls )
143
+ Set.new( ar_urls ).to_a
144
+ end
145
+
146
+ def get_external_urls_from_text( text )
147
+ raise "text #{text.class} is not a String" if !text.is_a? String
148
+ text.force_encoding 'utf-8'
149
+ ar_urls = get_links_from_regexs_caption( REGEX_EXTERNALS_LINKS, text )
150
+ ar_urls = get_urls_not_tumblr( ar_urls )
151
+ ar_urls = CurateTumblr.get_format_ar_urls( ar_urls )
152
+ Set.new( ar_urls ).to_a
153
+ end
154
+ end
155
+
156
+ # --- end of class methods ---
157
+
158
+ def init_extract_links!( hash_config={} )
159
+ @all_external_links = Set.new
160
+ end
161
+
162
+ def add_external_links_from_caption( caption )
163
+ links = CurateTumblr::Tumblr::ExtractLinks.get_external_urls_from_text( caption )
164
+ links = Set.new( links ) if links.is_a? Array
165
+ @all_external_links += links
166
+ @all_external_links.count
167
+ end
168
+
169
+ def add_external_link( link )
170
+ CurateTumblr.format_url!( link )
171
+ @all_external_links << link
172
+ @all_external_links.count
173
+ end
174
+
175
+ def extract_links_caption_from_post( hash_post )
176
+ return false if !hash_post.has_key?( "caption" )
177
+ add_tofollow_tumblr_links_from_caption( hash_post["caption"], CurateTumblr.get_source_from_hash_post( hash_post ) )
178
+ add_external_links_from_caption( hash_post["caption"] )
179
+ true
180
+ end
181
+
182
+ # --- utils ---
183
+
184
+ def get_reblog_key_from_post( tumblr_url, post_id )
185
+ tumblr_url = @tumblr_name if tumblr_url.empty?
186
+ CurateTumblr.get_reblog_key_from_hash_post( get_hash_post( tumblr_url, post_id ) )
187
+ end
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,102 @@
1
+ module CurateTumblr
2
+ module Tumblr
3
+ module Infos
4
+
5
+ HASH_CONFIG_INFOS = "infos"
6
+ HASH_CONFIG_INFOS_TAGS = "tags"
7
+ HASH_CONFIG_INFOS_TITLE = "title"
8
+
9
+ attr_accessor :infos_caption, :infos_tags, :infos_title
10
+ attr_reader :state
11
+
12
+ class << self
13
+ def get_infos_config_hash
14
+ {
15
+ HASH_CONFIG_INFOS => { }
16
+ }
17
+ end
18
+
19
+ def get_string_yaml_from_infos_config( hash_config )
20
+ hash_infos_config = hash_config[HASH_CONFIG_INFOS]
21
+ %Q{
22
+ #{Infos::HASH_CONFIG_INFOS}:
23
+ }
24
+ end
25
+
26
+ def check_infos_config_hash( hash_config )
27
+ if hash_config.has_key?( HASH_CONFIG_INFOS )
28
+ hash_infos_config = hash_config[HASH_CONFIG_INFOS]
29
+ # nothing to check now
30
+ end
31
+ end
32
+ end
33
+
34
+ # --- config ---
35
+
36
+ def init_infos!( hash_config={} )
37
+ @infos_caption = ''
38
+ @infos_tags = ''
39
+ @infos_title = ''
40
+ @state = STATE_QUEUE
41
+ config_infos( hash_config ) if !hash_config.empty?
42
+ end
43
+
44
+ def config_infos( hash_config )
45
+ hash_infos = hash_config[HASH_CONFIG_INFOS]
46
+ return if hash_infos.nil?
47
+ @infos_tags = hash_infos[HASH_CONFIG_INFOS_TAGS] if hash_infos.has_key?( HASH_CONFIG_INFOS_TAGS )
48
+ @infos_title = hash_infos[HASH_CONFIG_INFOS_TITLE] if hash_infos.has_key?( HASH_CONFIG_INFOS_TITLE )
49
+ set_title_caption if !@infos_title.empty?
50
+ end
51
+
52
+ def check_infos_config
53
+ end
54
+
55
+ # --- options ---
56
+
57
+ def get_tumblr_domain
58
+ @tumblr_name + ".tumblr.com"
59
+ end
60
+
61
+ def post_in_published
62
+ @state = STATE_PUBLISHED
63
+ end
64
+
65
+ def post_in_draft
66
+ @state = STATE_DRAFT
67
+ end
68
+
69
+ def post_in_queue
70
+ @state = STATE_QUEUE
71
+ end
72
+
73
+ def post_in_private
74
+ @state = STATE_PRIVATE
75
+ end
76
+
77
+ def in_published?
78
+ @state == STATE_PUBLISHED
79
+ end
80
+
81
+ def set_title_caption( title="", link="", style_begin="<i>", style_end="</i>", before="<p>&nbsp;</p>", after="")
82
+ title = @infos_title if title.empty?
83
+ raise "no title" if title.empty?
84
+ link = "http://www.#{@tumblr_name}.tumblr.com" if link.empty?
85
+ @infos_caption = "#{before}<p><a href='#{link}' target='_blank'>"
86
+ @infos_caption = @infos_caption + "#{style_begin}" if !style_begin.empty?
87
+ @infos_caption = @infos_caption + "#{title}"
88
+ @infos_caption = @infos_caption + "#{style_end}" if !style_end.empty?
89
+ @infos_caption = @infos_caption + "</a></p>#{after}"
90
+ end
91
+
92
+ def add_infos_ar_tags( ar_tags )
93
+ add_infos_tags( ar_tags.join(", ") )
94
+ end
95
+
96
+ def add_infos_tags( tags )
97
+ @infos_tags += ", " if !@infos_tags.empty?
98
+ @infos_tags += tags
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,5 @@
1
+ class Hash
2
+ def check_key( key )
3
+ raise "'#{key}' is empty in #{to_s}" if self[key].empty?
4
+ end
5
+ end
@@ -0,0 +1,4 @@
1
+ require File.expand_path File.join( File.dirname(__FILE__), 'utilities_client' )
2
+ require File.expand_path File.join( File.dirname(__FILE__), 'utilities_file' )
3
+ require File.expand_path File.join( File.dirname(__FILE__), 'utilities_format' )
4
+ require File.expand_path File.join( File.dirname(__FILE__), 'utilities_validate' )
@@ -0,0 +1,103 @@
1
+ module CurateTumblr
2
+
3
+ def self.get_reblog_key_from_hash_post( hash_post )
4
+ return false if !hash_post_valid?( hash_post )
5
+ return false if !hash_post.has_key?( 'reblog_key' )
6
+ hash_post['reblog_key']
7
+ end
8
+
9
+ def self.get_id_from_direct_post( hash_id )
10
+ return false if !hash_id_valid?( hash_id )
11
+ hash_id['id']
12
+ end
13
+
14
+ def self.get_hash_url_from_post_url( post_url )
15
+ return false if !Tumblr::ExtractLinks.tumblr_post_url?( post_url )
16
+ { tumblr_url: Tumblr::ExtractLinks.get_tumblr_url( post_url ), post_id: Tumblr::ExtractLinks.get_post_id_from_post_url( post_url ) }
17
+ end
18
+
19
+ def self.get_hash_url_from_reblog_url( reblog_url )
20
+ return false if !Tumblr::ExtractLinks.tumblr_reblog_url?( reblog_url )
21
+ { tumblr_url: Tumblr::ExtractLinks.get_tumblr_from_reblog_url( reblog_url ), post_id: Tumblr::ExtractLinks.get_post_id_from_reblog_url( reblog_url ) }
22
+ end
23
+
24
+ def self.get_source_from_hash_post( hash_post )
25
+ get_url_from_hash_post( "source_url", hash_post )
26
+ end
27
+
28
+ def self.get_link_url_from_hash_post( hash_post )
29
+ get_url_from_hash_post( "link_url", hash_post )
30
+ end
31
+
32
+ def self.get_url_from_hash_post( url_key, hash_post )
33
+ return false if !hash_post_valid?( hash_post )
34
+ return false if !hash_post.has_key?( url_key )
35
+ #return false if !tumblr_url?( hash_post[url_key] )
36
+ hash_post[url_key]
37
+ end
38
+
39
+ def self.hash_status_ok?( hash_status )
40
+ hash_status?( hash_status, CLIENT_STATUS_OK )
41
+ end
42
+
43
+ def self.hash_status_not_found?( hash_status )
44
+ hash_status?( hash_status, CLIENT_STATUS_NOT_FOUND )
45
+ end
46
+
47
+ def self.hash_status_rate_limit?( hash_status )
48
+ hash_status?( hash_status, CLIENT_STATUS_RATE_LIMIT )
49
+ end
50
+
51
+ def self.hash_status_bad_request?( hash_status )
52
+ hash_status?( hash_status, CLIENT_STATUS_BAD_REQUEST )
53
+ end
54
+
55
+ def self.hash_status?( hash_status, status )
56
+ return true if !hash_status.has_key?( "status" )
57
+ return true if hash_status["status"] == status
58
+ false
59
+ end
60
+
61
+ def self.get_random_sleep( min, max )
62
+ raise "min is empty" if !min
63
+ raise "max is empty" if !max
64
+ raise "min (#{min} is >= max (#{max})" if min >= max
65
+ seconds = 1 + rand( max )
66
+ seconds = min if seconds < min
67
+ seconds
68
+ end
69
+
70
+ def self.display_hash_post( hash_post, is_short=false )
71
+ raise "hash_post #{hash_post} is not valid" if !hash_post_valid?( hash_post )
72
+ title = "#{hash_post['type']} in #{hash_post['blog_name']} (#{hash_post['note_count']})"
73
+ title = title + " (source #{hash_post['source_url']})" if hash_post.has_key?( 'source_url' )
74
+ puts title
75
+ return if is_short
76
+ hash_post.each do |key, value|
77
+ puts "> #{key} : #{value}"
78
+ end
79
+ end
80
+
81
+ def self.display_hash_multiple_posts( hash_posts, is_short=false )
82
+ raise "hash_posts #{hash_posts} is not valid" if !hash_multiple_posts_valid?( hash_posts )
83
+ hash_posts['posts'].each { |hash_post| display_hash_post( hash_post, is_short ) }
84
+ end
85
+
86
+ def self.get_summary_hash_post( hash_post )
87
+ return hash_post if !hash_post_valid?( hash_post )
88
+ summary = {}
89
+ copy_hash_key( hash_post, summary, "blog_name")
90
+ copy_hash_key( hash_post, summary, "id")
91
+ copy_hash_key( hash_post, summary, "type")
92
+ copy_hash_key( hash_post, summary, "post_url")
93
+ copy_hash_key( hash_post, summary, "slug")
94
+ copy_hash_key( hash_post, summary, "state")
95
+ copy_hash_key( hash_post, summary, "reblog_key")
96
+ summary
97
+ end
98
+
99
+ def self.copy_hash_key( hash_source, hash_target, key )
100
+ return false if !hash_source.has_key?( key )
101
+ hash_target[key] = hash_source[key]
102
+ end
103
+ end
@@ -0,0 +1,50 @@
1
+ module CurateTumblr
2
+ def self.get_filepath_links_by_tumblr( tumblrname, path=PATH_LINKS )
3
+ path + tumblrname + "/" + tumblrname + "_links"
4
+ end
5
+
6
+ def self.get_filepath_errors_by_tumblr( tumblrname, path=PATH_LINKS )
7
+ path + tumblrname + "/" + tumblrname + "_errors"
8
+ end
9
+
10
+ def self.get_filepath_tofollow_by_tumblr( tumblrname, path=PATH_LINKS )
11
+ path + tumblrname + "/" + tumblrname + "_tofollow"
12
+ end
13
+
14
+ def self.get_filepath_externallinks_by_tumblr( tumblrname, path=PATH_LINKS )
15
+ path + tumblrname + "/" + tumblrname + "_externallinks"
16
+ end
17
+
18
+ def self.get_ar_from_file( filename )
19
+ raise "filename is empty" if filename.nil? || filename.empty?
20
+ ::File.open( filename, "r" ).readlines
21
+ end
22
+
23
+ def self.checkFile( filename )
24
+ raise "Filename is empty" if filename.empty?
25
+ raise "File #{filename} doesn't exist" if !::File.exists?( filename )
26
+ raise "#{filename} is not a true file" if !::File.file?( filename )
27
+ raise "File #{filename} is not readable" if !::File.readable?( filename )
28
+ true
29
+ end
30
+
31
+ def self.backup_file( filename, backupname="" )
32
+ return false if !::File.exists?( filename )
33
+ backupname = filename + ".save" if backupname.empty?
34
+ ::File.open(backupname , "w+" ) { |file| file.puts get_ar_from_file( filename ) }
35
+ true
36
+ end
37
+
38
+ def self.add_set_tofile_without_repeat( filename, set_tofile )
39
+ set_toadd = set_tofile.dup
40
+ ::File.open( filename, "w" ) if !::File.exists?( filename )
41
+ ar_file = get_ar_from_file( filename )
42
+ ar_file.each { |line| format_tumblr_url!( line ) }
43
+ set_toadd.merge( ar_file )
44
+ ::File.open( filename, "w" ) { |file| file.puts( set_toadd.to_a ) }
45
+ true
46
+ end
47
+
48
+ def create_config_file( filename, hash_config )
49
+ end
50
+ end