tweet_manager 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.byebug_history +60 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +21 -0
- data/README.md +96 -0
- data/Rakefile +6 -0
- data/a.xml +76 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/example/test.rb +52 -0
- data/lib/service/medium.rb +47 -0
- data/lib/service/youtube.rb +62 -0
- data/lib/tweet_manager/version.rb +3 -0
- data/lib/tweet_manager.rb +44 -0
- data/tweet_manager.gemspec +30 -0
- metadata +147 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 73b0388984581bd7ebeb77f4d7dc01223abe54d5
|
4
|
+
data.tar.gz: f9695338c299f1e6dd02de3d8579901df15430ed
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 785ab83f97bc5516b5958b6a8a8404f21b68c71ebefea125806279a5c525c117bbc68ca1a52d4b1b9946dce8e18c67a04b5cb8ab14cdec80cef82a520626ac9c
|
7
|
+
data.tar.gz: 529e5d1c6ab768a2cb2e6e604396046cf48bd9bc900b3ac5ca3f29e5cc41868a349b98e92281d1112bf3feebde0d4571fbffc59bb4240d2803996bd68034d71f
|
data/.byebug_history
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
q
|
2
|
+
c
|
3
|
+
n
|
4
|
+
q
|
5
|
+
response['items'].map { |item| item['contentDetails'].values.first['uploads'] }
|
6
|
+
response['items']
|
7
|
+
response = parse_response(@agent.get(CHANNEL_ENDPOINT, params))
|
8
|
+
@agent.get(CHANNEL_ENDPOINT, params)
|
9
|
+
q
|
10
|
+
playlists(channel_id)
|
11
|
+
c
|
12
|
+
url
|
13
|
+
q
|
14
|
+
url =~ /\A#{URI::regexp(['http', 'https'])}\z/
|
15
|
+
Mechanize.new.get url
|
16
|
+
url
|
17
|
+
URI.join('http', url).to_s
|
18
|
+
URI.join('http:', url).to_s
|
19
|
+
URI.join('http:/', url).to_s
|
20
|
+
URI.join('http://', url).to_s
|
21
|
+
url = URI.join('http', url).to_s
|
22
|
+
url = target['@'].nil? ? url_from_domain(target) : url_from_user(target)
|
23
|
+
url = URI.join('http', url).to_s
|
24
|
+
page = Mechanize.new.get url
|
25
|
+
url
|
26
|
+
q
|
27
|
+
items.first.save 'item.xml'
|
28
|
+
items.first
|
29
|
+
items.size
|
30
|
+
items = page.search('//item')
|
31
|
+
page.search '//item'
|
32
|
+
page.save 'a.xml'
|
33
|
+
page.xml.search ".//*[@id='feedTitleText']"
|
34
|
+
page.xml.class.search ".//*[@id='feedTitleText']"
|
35
|
+
page.xml.class.search .//*[@id='feedTitleText']
|
36
|
+
page.xml.class
|
37
|
+
page.xml
|
38
|
+
page.search ".//*[@id='feedTitleText']"
|
39
|
+
page
|
40
|
+
page.body.class
|
41
|
+
page.body
|
42
|
+
page.search '//strong'
|
43
|
+
page
|
44
|
+
page.search
|
45
|
+
page.node
|
46
|
+
page.first
|
47
|
+
page.at ENTRIES_PATH
|
48
|
+
page.at
|
49
|
+
page.Searchable
|
50
|
+
page.to_html
|
51
|
+
page.reader
|
52
|
+
page.xpath(".//p")
|
53
|
+
page.class
|
54
|
+
page
|
55
|
+
page.search(".//p")
|
56
|
+
page.search(".//span")
|
57
|
+
page.search(".//div")
|
58
|
+
page.search(".//*[@class='entry']")
|
59
|
+
page.search(ENTRIES_PATH)
|
60
|
+
page
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# Contributor Code of Conduct
|
2
|
+
|
3
|
+
As contributors and maintainers of this project, and in the interest of
|
4
|
+
fostering an open and welcoming community, we pledge to respect all people who
|
5
|
+
contribute through reporting issues, posting feature requests, updating
|
6
|
+
documentation, submitting pull requests or patches, and other activities.
|
7
|
+
|
8
|
+
We are committed to making participation in this project a harassment-free
|
9
|
+
experience for everyone, regardless of level of experience, gender, gender
|
10
|
+
identity and expression, sexual orientation, disability, personal appearance,
|
11
|
+
body size, race, ethnicity, age, religion, or nationality.
|
12
|
+
|
13
|
+
Examples of unacceptable behavior by participants include:
|
14
|
+
|
15
|
+
* The use of sexualized language or imagery
|
16
|
+
* Personal attacks
|
17
|
+
* Trolling or insulting/derogatory comments
|
18
|
+
* Public or private harassment
|
19
|
+
* Publishing other's private information, such as physical or electronic
|
20
|
+
addresses, without explicit permission
|
21
|
+
* Other unethical or unprofessional conduct
|
22
|
+
|
23
|
+
Project maintainers have the right and responsibility to remove, edit, or
|
24
|
+
reject comments, commits, code, wiki edits, issues, and other contributions
|
25
|
+
that are not aligned to this Code of Conduct, or to ban temporarily or
|
26
|
+
permanently any contributor for other behaviors that they deem inappropriate,
|
27
|
+
threatening, offensive, or harmful.
|
28
|
+
|
29
|
+
By adopting this Code of Conduct, project maintainers commit themselves to
|
30
|
+
fairly and consistently applying these principles to every aspect of managing
|
31
|
+
this project. Project maintainers who do not follow or enforce the Code of
|
32
|
+
Conduct may be permanently removed from the project team.
|
33
|
+
|
34
|
+
This code of conduct applies both within project spaces and in public spaces
|
35
|
+
when an individual is representing the project or its community.
|
36
|
+
|
37
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
38
|
+
reported by contacting a project maintainer at jota.segovia@gmail.com. All
|
39
|
+
complaints will be reviewed and investigated and will result in a response that
|
40
|
+
is deemed necessary and appropriate to the circumstances. Maintainers are
|
41
|
+
obligated to maintain confidentiality with regard to the reporter of an
|
42
|
+
incident.
|
43
|
+
|
44
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
45
|
+
version 1.3.0, available at
|
46
|
+
[http://contributor-covenant.org/version/1/3/0/][version]
|
47
|
+
|
48
|
+
[homepage]: http://contributor-covenant.org
|
49
|
+
[version]: http://contributor-covenant.org/version/1/3/0/
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2017 jotase
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
# TweetManager
|
2
|
+
|
3
|
+
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/tweet_manager`. To experiment with that code, run `bin/console` for an interactive prompt.
|
4
|
+
|
5
|
+
Gem for automatize tweets using our content's source, like YouTube and Medium
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'tweet_manager'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install tweet_manager
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
|
27
|
+
|
28
|
+
# define twitter credentials https://apps.twitter.com/
|
29
|
+
credentials = {
|
30
|
+
consumer_key: ENV['CONSUMER_KEY'],
|
31
|
+
consumer_secret: ENV['CONSUMER_SECRET'],
|
32
|
+
access_token: ENV['ACCESS_TOKEN'],
|
33
|
+
access_token_secret: ENV['ACCESS_TOKEN_SECRET']
|
34
|
+
}
|
35
|
+
|
36
|
+
# define youtube developer key https://console.developers.google.com/
|
37
|
+
youtube_api_key = ENV['YOUTUBE_KEY']
|
38
|
+
|
39
|
+
# instance a tweet class
|
40
|
+
tweet = TweetManager::Tweet.new(credentials, youtube_api_key)
|
41
|
+
|
42
|
+
# Youtube content
|
43
|
+
|
44
|
+
# define the channel id
|
45
|
+
channel_id = 'UCVbaQ_GvmiOapfn2GOyBiLQ' # 5rabbits's channel
|
46
|
+
|
47
|
+
# Tweet last youtube video from a channel
|
48
|
+
tweet.action(service: 'youtube', type: 'last', target: channel_id)
|
49
|
+
|
50
|
+
# Tweet random youtube video from a channel
|
51
|
+
tweet.action(service: 'youtube', type: 'random', target: channel_id)
|
52
|
+
|
53
|
+
# if you wanna add extra content to your tweet as mentions or whatever you want
|
54
|
+
# add an 'extra' argument as array
|
55
|
+
tweet.action(service: 'youtube', type: 'random', target: channel_id, extras: ['@5rabbitsHQ', 'Check this out!'])
|
56
|
+
|
57
|
+
# Medium contentn
|
58
|
+
# Define a medium user
|
59
|
+
medium_user = '@_jotase'
|
60
|
+
|
61
|
+
# or a custom domain for medium
|
62
|
+
blog = 'blog.5rabbits.com'
|
63
|
+
|
64
|
+
# Tweet last story from a user
|
65
|
+
tweet.action(service: 'medium', type: 'last', target: medium_user)
|
66
|
+
|
67
|
+
# Tweet last story from a custom domain
|
68
|
+
tweet.action(service: 'medium', type: 'last', target: blog)
|
69
|
+
|
70
|
+
# Tweet random story from a user
|
71
|
+
tweet.action(service: 'medium', type: 'random', target: medium_user)
|
72
|
+
|
73
|
+
# Tweet random story from a custom domain
|
74
|
+
tweet.action(service: 'medium', type: 'random', target: blog)
|
75
|
+
|
76
|
+
# if you wanna add extra content to your tweet as mentions or whatever you want
|
77
|
+
# as before, add an 'extra' argument as array
|
78
|
+
tweet.action(service: 'medium', type: 'random', target: blog, extras: ['@5rabbitsHQ', 'Check this out!'])
|
79
|
+
|
80
|
+
```
|
81
|
+
|
82
|
+
## Development
|
83
|
+
|
84
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
85
|
+
|
86
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
87
|
+
|
88
|
+
## Contributing
|
89
|
+
|
90
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/5rabbits/tweet_manager. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
91
|
+
|
92
|
+
|
93
|
+
## License
|
94
|
+
|
95
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
96
|
+
|
data/Rakefile
ADDED
data/a.xml
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?><rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0" xmlns:cc="http://cyber.law.harvard.edu/rss/creativeCommonsRssModule.html">
|
2
|
+
<channel>
|
3
|
+
<title><![CDATA[Stories by Javier Segovia on Medium]]></title>
|
4
|
+
<description><![CDATA[Stories by Javier Segovia on Medium]]></description>
|
5
|
+
<link>https://medium.com/@_jotase?source=rss-64c23a1254a7------2</link>
|
6
|
+
<image>
|
7
|
+
<url>https://cdn-images-1.medium.com/fit/c/150/150/1*iQXLw-kaFX6FD6ROjQuISg.jpeg</url>
|
8
|
+
<title>Stories by Javier Segovia on Medium</title>
|
9
|
+
<link>https://medium.com/@_jotase?source=rss-64c23a1254a7------2</link>
|
10
|
+
</image>
|
11
|
+
<generator>Medium</generator>
|
12
|
+
<lastBuildDate>Fri, 28 Jul 2017 16:30:04 GMT</lastBuildDate>
|
13
|
+
<atom:link href="https://medium.com/feed/@_jotase" rel="self" type="application/rss+xml"/>
|
14
|
+
<webMaster><![CDATA[yourfriends@medium.com]]></webMaster>
|
15
|
+
<atom:link href="http://medium.superfeedr.com" rel="hub"/>
|
16
|
+
<item>
|
17
|
+
<title><![CDATA[I developed a bot for this, and I did a little post about it “Cazando bots en Twitter”…]]></title>
|
18
|
+
<link>https://medium.com/@_jotase/i-developed-a-bot-for-this-and-i-did-a-little-post-about-it-cazando-bots-en-twitter-1a5de61124ca?source=rss-64c23a1254a7------2</link>
|
19
|
+
<guid isPermaLink="false">https://medium.com/p/1a5de61124ca</guid>
|
20
|
+
<dc:creator><![CDATA[Javier Segovia]]></dc:creator>
|
21
|
+
<pubDate>Sun, 18 Jun 2017 22:22:11 GMT</pubDate>
|
22
|
+
<atom:updated>2017-06-18T22:22:11.221Z</atom:updated>
|
23
|
+
<content:encoded><![CDATA[<p>I developed a bot for this, and I did a little post about it “Cazando bots en Twitter” @Jota_Segovia https://medium.com/@_jotase/cazando-bots-en-twitter-f173e2315185</p><img src="https://medium.com/_/stat?event=post.clientViewed&referrerSource=full_rss&postId=1a5de61124ca" width="1" height="1">]]></content:encoded>
|
24
|
+
</item>
|
25
|
+
<item>
|
26
|
+
<title><![CDATA[Si te refieres a los del gobierno, sin herramientas pagas, si te refieres a mi, gracias.]]></title>
|
27
|
+
<link>https://medium.com/@_jotase/si-te-refieres-a-los-del-gobierno-sin-herramientas-pagas-si-te-refieres-a-mi-gracias-d4f8415abd60?source=rss-64c23a1254a7------2</link>
|
28
|
+
<guid isPermaLink="false">https://medium.com/p/d4f8415abd60</guid>
|
29
|
+
<dc:creator><![CDATA[Javier Segovia]]></dc:creator>
|
30
|
+
<pubDate>Sat, 10 Jun 2017 00:30:46 GMT</pubDate>
|
31
|
+
<atom:updated>2017-06-10T00:30:46.972Z</atom:updated>
|
32
|
+
<content:encoded><![CDATA[<p>Si te refieres a los del gobierno, sin herramientas pagas, si te refieres a mi, gracias.</p><img src="https://medium.com/_/stat?event=post.clientViewed&referrerSource=full_rss&postId=d4f8415abd60" width="1" height="1">]]></content:encoded>
|
33
|
+
</item>
|
34
|
+
<item>
|
35
|
+
<title><![CDATA[Cazando bots en Twitter]]></title>
|
36
|
+
<link>https://medium.com/@_jotase/cazando-bots-en-twitter-f173e2315185?source=rss-64c23a1254a7------2</link>
|
37
|
+
<guid isPermaLink="false">https://medium.com/p/f173e2315185</guid>
|
38
|
+
<category><![CDATA[python]]></category>
|
39
|
+
<category><![CDATA[bots]]></category>
|
40
|
+
<category><![CDATA[twitter]]></category>
|
41
|
+
<category><![CDATA[politics]]></category>
|
42
|
+
<category><![CDATA[venezuela]]></category>
|
43
|
+
<dc:creator><![CDATA[Javier Segovia]]></dc:creator>
|
44
|
+
<pubDate>Thu, 08 Jun 2017 19:03:19 GMT</pubDate>
|
45
|
+
<atom:updated>2017-06-08T19:03:19.698Z</atom:updated>
|
46
|
+
<content:encoded><![CDATA[<p>Imagínense vivir en un país en dictadura, donde el dia anterior había sido el <strong>Dia del Trabajador</strong> y el salario mínimo mensual no llega a 50 USD, paralelamente habían<a href="https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=2&cad=rja&uact=8&ved=0ahUKEwjuzN-5wdLTAhWBipAKHbLiCEAQqUMIKjAB&url=http%3A%2F%2Fwww.elnuevoherald.com%2Fnoticias%2Fmundo%2Famerica-latina%2Fvenezuela-es%2Farticle147825589.html&usg=AFQjCNGPwgH2m6eH_rCQyci0OcLHop8qZA&sig2=dNafQ8b9hCpHrLBzPl9T4w"> disturbios y manifestaciones</a> en todo el país (porque el país sufre<a href="https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=8&ved=0ahUKEwjW5vjGwdLTAhVEQ5AKHcahA60QFghKMAc&url=http%3A%2F%2Fwww.eltiempo.com%2Fnoticias%2Fcrisis-en-venezuela&usg=AFQjCNGffjGkccUKANNdLkaG_Ae8jw3MSA&sig2=VHisGCalKYeFI2FK1fOI9Q&cad=rja"> crisis económicas, sociales, de salud, etc.</a>) siendo reprimidas de forma violenta por el poder que juro proteger al ciudadano, un dia terrible, pues ese es mi país de origen, mi amada Venezuela.</p><p>Para mejorar la situación, el presidente convoca a una <a href="https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=6&cad=rja&uact=8&ved=0ahUKEwi8o4jvwdLTAhVEQ5AKHcahA60QFghCMAU&url=http%3A%2F%2Fwww.el-nacional.com%2Fnoticias%2Fgobierno%2Fmaduro-convoco-una-constituyente-comunal_179943&usg=AFQjCNHBvlVf-h3shtusQ2SDmAipnTsXfw&sig2=8GL5l9W82DffVG2yPzZyoA">“Constituyente Comunal”</a> el cual genera todo tipo de repudio nacional como internacional, aun así al siguiente dia en el <strong>twitter</strong> en medio de protestas y repudios la etiqueta pro-gobierno <strong>#AConstituyentePorLaPaz</strong> se mantiene posicionada, este fenómeno ocurre todos los días, al tener todos los medios censurados en el país, todo es color de rosa, pero muy contrario a la versión de las redes sociales, y al no poder censurarla, tratan de manipularla desvirtuando la opinión publica teniendo <strong>trending topics</strong> pro gobierno, para así tener una prueba de que el “pueblo” (a.k.a <strong>bots</strong>) están con ellos, este es un secreto a voces, muchas personas han demostrado con fotos, videos, etc como operan los bots, pero yo aproveche el fin de semana largo, y me dedique a demostrarlo como mejor lo se hacer, creando <strong>bots</strong> para detectar <strong>bots</strong> :D.</p><p>Cabe destacar que esto no es solamente para analizar los bots del gobierno, hay otros personajes que se dedican también a posicionar etiquetas de formas fraudulentas, sobre todo en las noches, así que parte de lo que mostrare no funcionara contra los <strong>bots</strong> <strong>“chavistas”</strong> porque usan estrategias distintas.</p><p>Así como este post lo hago en español, no lo haré tan técnico como mis posts anteriores, mostrare simplemente los resultados obtenidos al analizar para ser exacto <strong>15563 tweets</strong> con la etiqueta <strong>#AConstituyentePorLaPaz</strong></p><blockquote>Aquellos que son programadores, esto lo hice usando <a href="https://github.com/tweepy/tweepy">https://github.com/tweepy/tweepy,</a> y <a href="http://pandas.pydata.org/">http://pandas.pydata.org/</a>, luego subiré el código fuente</blockquote><p>Para detectar bots me base en varios criterios</p><ul><li>Tweets originales (no retweets) duplicados por distintas cuentas, es muy sospechoso que varias personas piensen igual (aunque eso es parte del modelo socialista, el pensamiento único :D)</li></ul><figure><img alt="" src="https://cdn-images-1.medium.com/max/1024/1*OBwULKspP4a4WHVEN4Zsfw.png" /></figure><ul><li>Tweets falsos, como les mencione anteriormente, con los otros personajes que posicionan etiquetas, tienen varias cuentas que publican varios tweets con números o palabras aleatorias que no tienen que ver con la etiqueta.</li></ul><figure><img alt="" src="https://cdn-images-1.medium.com/max/1024/1*yJ6Oi4pXE-oEZPK_bKp0UQ.png" /></figure><ul><li>Tweets masivos por el mismo usuario, hay varias cuentas que se dedican a twitter y a dar RT masivamente a los tweets con estas etiquetas</li></ul><figure><img alt="" src="https://cdn-images-1.medium.com/max/326/1*Ah8xA47gIFlwJzcuxMIHVw.png" /></figure><ul><li>Tweets provenientes de herramientas de automatización, algo que encontré muy resaltate, era como estos bots operaban mediante el uso de TweetDeck, Botize y IFTTT</li></ul><figure><img alt="" src="https://cdn-images-1.medium.com/max/1024/1*3bRzCCeW2_XzUrl7MH_qug.png" /></figure><figure><img alt="" src="https://cdn-images-1.medium.com/max/1024/1*KNOFHpZL1cQS5SRQdr_vSw.png" /></figure><ul><li>Y por ultimo, el mas descarado, RTs hecho masiva y simultáneamente, esta comparación la hice buscando todos los tweets hecho rt al mismo tiempo, es decir, con hora, minuto, segundos y mili-segundos!, sin dejar abierto la posibilidad de margen de error</li></ul><figure><img alt="" src="https://cdn-images-1.medium.com/max/1024/1*U-ye_kS9ZSgE752Y-SpGwA.png" /></figure><p>En resumen, si bien despreciamos todas las pruebas excepto la ultima de los retweets simultáneos, representa mas del 15% de los tweets, por la densidad de los mismos, posicionan de manera eficaz las etiquetas, el resto de los tweets y menciones si se generan de manera orgánica, ya que algunos las respaldan y otros simplemente la condenan, de ambas formas mantienen la posición de la etiqueta, pero solo necesitaba el primer empujón, gracias al pueblo (bots)!</p><img src="https://medium.com/_/stat?event=post.clientViewed&referrerSource=full_rss&postId=f173e2315185" width="1" height="1">]]></content:encoded>
|
47
|
+
</item>
|
48
|
+
<item>
|
49
|
+
<title><![CDATA[Breaking captchas from scratch (almost)]]></title>
|
50
|
+
<link>https://blog.5rabbits.com/breaking-captchas-from-scracth-almost-753895fade8a?source=rss-64c23a1254a7------2</link>
|
51
|
+
<guid isPermaLink="false">https://medium.com/p/753895fade8a</guid>
|
52
|
+
<category><![CDATA[computer-vision]]></category>
|
53
|
+
<category><![CDATA[algorithms]]></category>
|
54
|
+
<category><![CDATA[programming]]></category>
|
55
|
+
<category><![CDATA[artificial-intelligence]]></category>
|
56
|
+
<category><![CDATA[ruby]]></category>
|
57
|
+
<dc:creator><![CDATA[Javier Segovia]]></dc:creator>
|
58
|
+
<pubDate>Tue, 27 Dec 2016 19:31:47 GMT</pubDate>
|
59
|
+
<atom:updated>2016-12-29T23:15:18.823Z</atom:updated>
|
60
|
+
<content:encoded><![CDATA[<p>Breaking captchas using ImageMagick + Tesseract</p><p>For those who need to automate tasks or just extract data (web scraping) from a site, you may encounter with the old and annoying captchas (because they don’t have a public API we can request gently)</p><figure><img alt="" src="https://cdn-images-1.medium.com/max/926/1*xGJi-b6lKC2mIl_89Mfepg.jpeg" /></figure><p>This post is intended to show how to solve this type of captchas (no google recaptchas) using <a href="http://www.imagemagick.org/script/index.php">ImageMagick</a> and <a href="https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=3&cad=rja&uact=8&ved=0ahUKEwjt5ryek4jRAhVBhJAKHd6CBB8QFggwMAI&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FTesseract&usg=AFQjCNEJVJ7mNPUsqUf9pqzFCaR59Ou5PQ&sig2=lCU8npUH1ZSHtKEb9gtJBQ">Tesseract</a></p><p>If you’re not familiar with Tesseract, it’s an OCR (Optical Character Recognition) created by HP and that’s what we will use it to recognize characters in a image, OCR is not the ultimate solution, because the trainning it’s based on clean images, so to get the best results from tesseract, you have to optimize the images . To achieve that, you should do the following:</p><ol><li><strong>Clean</strong>: Captchas usually will present “noise”, to avoid that a simple OCR will break it and make humans solve it, these noise can be dots, stripes, distortions, etc. so we have to remove every non-alphanumeric content from the image</li></ol><figure><img alt="" src="https://cdn-images-1.medium.com/max/500/1*aq2G2quV86nH1yMYUSgs9Q.png" /></figure><p>2. <strong>Binary image: </strong>To help OCR, the image should be optimized, this mean that, pixels have to be black or white, non gray areas (or color area duh), it will be useful to understand characters if only “true positive” pixels exist so features and patterns can be detected.</p><p>3. <strong>Remove blank spaces:</strong> Avoiding processing blank spaces, will improve performance (less pixels to read) and result.</p><p>4. <strong>Configuration: </strong>Tesseract has tons of configuration, but for this task, just few will be useful. <a href="https://github.com/gali8/Tesseract-OCR-iOS/wiki/Advanced-Tesseract-Configuration">https://github.com/gali8/Tesseract-OCR-iOS/wiki/Advanced-Tesseract-Configuration</a></p><p>Now let’s write an example</p><p>This project will be written in ruby, so we’ll need <a href="https://github.com/rmagick/rmagick"><strong>rmagick</strong></a> and <a href="https://github.com/dannnylo/rtesseract"><strong>rtesseract</strong></a><strong> </strong>gems to interact with ImageMagick and Tesseract.</p><p>First, let’s write a class that generate captchas</p><iframe src="" width="0" height="0" frameborder="0" scrolling="no"><a href="https://medium.com/media/ac672311a4995e9ed0b8d2164542744e/href">https://medium.com/media/ac672311a4995e9ed0b8d2164542744e/href</a></iframe><blockquote>This code it’s extracted from <a href="https://github.com/kyledayton/rcaptcha"><strong>rcaptcha,</strong></a><strong> </strong>I edited to add custom image resolution</blockquote><p>To get a captcha, we should write</p><pre>require_relative 'captcha'<br>captcha_path = 'captcha.jpg'</pre><pre>text = 'foobar'<br>width = 400<br>height = 200<br>text_size = 80<br>captcha = Captcha.generate text, width, height, text_size</pre><pre>File.open(captcha_path, 'wb') { |f| f.write(captcha) }</pre><p>This will give us an image like this</p><figure><img alt="" src="https://cdn-images-1.medium.com/max/400/1*BjBojWWMJ92PC5S_RmIsLA.jpeg" /></figure><p>FYI, if you send this image to an OCR, you probably get a perfect result, it’s easy to read, but if you combine uppercase and lowercase letter + numbers, you probably not. Anyway, this post it’s to show a way to solve it.</p><p>For testing, we’ll create arandom text for captchas using <a href="https://github.com/stympy/faker"><strong>faker</strong></a></p><pre>...<br>require 'faker'</pre><pre>text = Faker::Lorem.characters(6)<br>captcha = Captcha.generate text, width, height, text_size</pre><pre>File.open(captcha_path, 'wb') { |f| f.write(captcha) }</pre><figure><img alt="" src="https://cdn-images-1.medium.com/max/400/1*u9DOz6E9FENADdARd1yWrg.jpeg" /></figure><p>Now let’s improve the image to extract characters with OCR</p><p>We should understand the captcha, we know that it’s always a center text, with 6 characters with lowercase letters and numbers, characters are blue, and there’re a lot of dots (noise) with different colors, even blue as the characters. So, let first crop the image with the known location</p><pre># Read image<br>img = Magick::Image.read(captcha_path).first<br># args X, Y, width, height<br>img.crop! 50, 60, 300, 80</pre><pre>img.write 'captcha_solved.jpg'</pre><figure><img alt="" src="https://cdn-images-1.medium.com/max/300/1*FKcgmCtQ4UqPERLajU3oVw.jpeg" /></figure><p>then, reduce image size to reduce computation, be careful doing this, because smaller the image, less information you will have, in this case, pixels to read.</p><pre>img.scale! 0.75</pre><p>now, transform image into gray colour scale</p><pre># transform image into gray scale <br>img = img.quantize(128, Magick::GRAYColorspace)</pre><figure><img alt="" src="https://cdn-images-1.medium.com/max/225/1*KuMU9ajz7v4ODMEGf-9K2Q.jpeg" /></figure><p>we do this to help us clean noise, now we can convert those pixels into white pixels below a threshold we define, in this case I selected 180 pixels (this pixels is based on 256 * 256 colors)</p><pre># convert into white everything below the <br>img = img.white_threshold(180 * 256)</pre><figure><img alt="" src="https://cdn-images-1.medium.com/max/225/1*Dv8b47KG_EnHkVwQ6D8-CA.jpeg" /></figure><p>Those dots remaining are equal or similar with the character’s colors, but believe it or not, it help us a LOT!, now let’s convert this pixels colors into “binary colors” just 0 or 255.</p><pre># transform image into binary colors<br>img = img.quantize(2, Magick::GRAYColorspace)</pre><figure><img alt="" src="https://cdn-images-1.medium.com/max/225/1*5LexGvxZPbCHj4h4KgyGfA.jpeg" /></figure><p>We still got this annoying dots remaining, if you see close, there are white dots or blank spaces inside our characters, because they’re relative small, they can be harmless, but what if they’re bigger? it could be a huge problem because OCR will maybe recognize it as a different character, just imagine a “8” with one of it’s curves erased by an blank spot turn it into a “3” and vice-versa with black dots, or just becoming characters into nothing for the OCR, this is a issue we have to handle. To do that, we can “average” neighbors pixels, converting each pixels into the average value inside a ratio.</p><iframe src="" width="0" height="0" frameborder="0" scrolling="no"><a href="https://medium.com/media/7f0dcbf19064d6f32231a8a946fb32a2/href">https://medium.com/media/7f0dcbf19064d6f32231a8a946fb32a2/href</a></iframe><p>I like to add some blank border to image to clean everything possible in a ratio</p><pre># Add border to avoid noise there<br>img.border!(5, 5, 'white')</pre><p>Because there’re less black dot noises than white spaces, I’ll start cleaning those using a ratio of 2 pixels because, there’re just 1 pixel dots.</p><pre>process img, 'white', 2</pre><figure><img alt="" src="https://cdn-images-1.medium.com/max/235/1*JzXUsJqs8cv7BF7fxJM2CA.jpeg" /></figure><p>And then, fill those blank spots inside the characters with bigger a ratio</p><pre>process img, 'black', 3</pre><figure><img alt="" src="https://cdn-images-1.medium.com/max/235/1*RcFbjjKRcvo-Du6v2MfffQ.jpeg" /></figure><p>Now let’s try to make the edges soft by using gaussian blur</p><pre># soft edges<br>img = img.gaussian_blur 0.5, 0.5</pre><figure><img alt="" src="https://cdn-images-1.medium.com/max/235/1*oFuzDYhN3O6KKZCTX5D4ZA.jpeg" /></figure><p>And lastly, we trim the image to remove blank spaces</p><pre>img.fuzz = 1<br>img.trim!</pre><figure><img alt="" src="https://cdn-images-1.medium.com/max/182/1*0eMR4QwrkukG6KsWiOG0Wg.jpeg" /></figure><p>Now we got our image ready to read, remember the tesseract configuration I told you before? well, there’re also some parameters you use when you run tesseract</p><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/Command-Line-Usage">https://github.com/tesseract-ocr/tesseract/wiki/Command-Line-Usage</a></p><p>but we need 3 things:</p><ol><li><strong>PSM: </strong>Because this kind of captcha is a single “word”, we set “7” as value to trait the image as a single line.</li><li><strong>LANG:</strong> Tesseract has default trainning sets for different language, and because we we’re not reading special characters we set english “eng” as language.</li><li><strong>OPTIONS:</strong> We can set a lot of parameters to OCR, but let’s keep it simple, just set a whilelist for the characters we can expect from the image, in this case we need “abcdefghijklmnopqrstuvwxyz1234567890”.</li></ol><p>For options, we need to create a file with the parameters, and save it with the name you want in the tesseract config directory,</p><pre># /usr/share/tesseract-ocr/tessdata/config/captcha<br>tessedit_char_whitelist abcdefghijklmnopqrstuvwxyz1234567890</pre><p>And finally, lets try to solve the captcha through the OCR</p><pre>require 'rtesseract'<br>solved_path = 'captcha_solved.jpg'<br>img.write solved_path</pre><pre>text = RTesseract.new(solved_path,<br> lang: :eng,<br> options: :captcha,<br> psm: 7)<br>text.to_s_without_spaces # => "16qe9o"</pre><p>If we pass the original image to the OCR, returns a empty string because it couldn’t understand the text.</p><p>I created a repo with this example, and with a script to test our solution, and it has a 80% accuracy that it’s really useful, because if we cant solve any particular captcha, we can refresh and try again.</p><p><a href="https://github.com/JotaSe/solving-captcha">JotaSe/solving-captcha</a></p><h3>Conclusion</h3><p>There’re more ways to solve captcha, a smarter way it’s using Machine Learning, you can create a dataset of multiple types of captchas with different noises and use it to train and get better accuracy, but if you can’t do that, I hope this post can be useful for you.</p><img src="https://medium.com/_/stat?event=post.clientViewed&referrerSource=full_rss&postId=753895fade8a" width="1" height="1"><hr><p><a href="https://blog.5rabbits.com/breaking-captchas-from-scracth-almost-753895fade8a">Breaking captchas from scratch (almost)</a> was originally published in <a href="https://blog.5rabbits.com">5rabbits Engineering</a> on Medium, where people are continuing the conversation by highlighting and responding to this story.</p>]]></content:encoded>
|
61
|
+
</item>
|
62
|
+
<item>
|
63
|
+
<title><![CDATA[Deciding like a robot. NOT! (Boolean Expression in Ruby)]]></title>
|
64
|
+
<link>https://blog.5rabbits.com/deciding-like-a-robot-not-boolean-expression-in-ruby-96fc20e404c0?source=rss-64c23a1254a7------2</link>
|
65
|
+
<guid isPermaLink="false">https://medium.com/p/96fc20e404c0</guid>
|
66
|
+
<category><![CDATA[algorithms]]></category>
|
67
|
+
<category><![CDATA[ruby]]></category>
|
68
|
+
<category><![CDATA[metaprogramming]]></category>
|
69
|
+
<category><![CDATA[programming]]></category>
|
70
|
+
<dc:creator><![CDATA[Javier Segovia]]></dc:creator>
|
71
|
+
<pubDate>Thu, 29 Sep 2016 17:16:53 GMT</pubDate>
|
72
|
+
<atom:updated>2016-09-29T17:16:53.318Z</atom:updated>
|
73
|
+
<content:encoded><![CDATA[<p>Sometimes we should create annoying “rules” to make decisions when we are coding things like this</p><pre>if foo & bar & a.eql?('foo') || foo_bar == bar || ... # and so on</pre><p>So to keep it “legible” we should do some improvements, like creating methods to re-use some statements or simply nesting statements to avoid long lines of code. But, if we think as a “machine”, we have to interpret “true” and “false” values (binary values 1,0)</p><p>Let’s see how a circuit works</p><figure><img alt="" src="https://cdn-images-1.medium.com/max/945/1*7HOC9-rfp81zE_Ac0Iv3tg.png" /><figcaption><a href="http://www.upcrost.site/boolean-expression-to-logic-circuit/logic-gates-converting-nand-to-nor-boolean-expression-boolean-circuit-software-y0tdr/">http://www.upcrost.site/boolean-expression-to-logic-circuit/logic-gates-converting-nand-to-nor-boolean-expression-boolean-circuit-software-y0tdr/</a></figcaption></figure><p>We got 3 switches <strong>S</strong>, <strong>X</strong> and <strong>Y</strong>, and there’s a circuit to turn on a bulb, as a programmer we can see the <a href="https://en.wikipedia.org/wiki/Logic_gate"><strong>logics gates</strong></a><strong> </strong>as “If Statements” because, that’s what they are, a physical implementation of boolean functions, so our function is <strong>(S’X + SY)</strong></p><pre>If S & !X || S & Y<br> # Do stuff<br>end</pre><p>Not a big deal right?, but what if we have a lot of inputs and a lot of logics gates?</p><figure><img alt="" src="https://cdn-images-1.medium.com/max/400/1*15ZCaOYwvKjKt93_H1owww.gif" /></figure><p>Or even bigger?</p><p>So, let’s try to code that</p><iframe src="" width="0" height="0" frameborder="0" scrolling="no"><a href="https://medium.com/media/6068329e227d8fa060c4377f3453bd9f/href">https://medium.com/media/6068329e227d8fa060c4377f3453bd9f/href</a></iframe><p>What about if we have the same input “<strong>switches</strong>” but different circuits for different outputs?, then we’ll have to code other <strong>circuit methods</strong> or <strong>if statements</strong></p><pre>def foo_circuit(s, x, y)<br> # logic stuff<br>end</pre><pre>def bar_circuit(s, x, y)<br> <br>end</pre><pre># ... Imagine if it's more complex...</pre><p>This can work for a lot of purposes, personally, I was thinking about how to create a To Do list as a decision tree, where every node can contain a decision tree as well, it would be a pain in the ass if we wanted to develop/maintain it</p><p>But, why don’t we use <a href="https://www.toptal.com/ruby/ruby-metaprogramming-cooler-than-it-sounds"><strong>meta-programming</strong></a> to do this task? To avoid similar methods or block codes within the same types of input / output. That’s why I coded a little gem thinking about it, let’s take a look</p><p><a href="https://rubygems.org/gems/undecided">undecided | RubyGems.org | your community gem host</a></p><iframe src="" width="0" height="0" frameborder="0" scrolling="no"><a href="https://medium.com/media/95c992c7d9a2218bc2825614060133a2/href">https://medium.com/media/95c992c7d9a2218bc2825614060133a2/href</a></iframe><p>Easy right? OK maybe you’re thinking, it’s easier to do</p><pre>return (A & B)</pre><p>Let’s create a TO DO class, that have an array of tasks and a name to identify them:</p><pre>class ToDo<br> # A to do list should have an array of tasks<br> attr_reader :tasks, :name</pre><pre># let’s define our tasks<br> def initialize(name, tasks)<br> <a href="http://twitter.com/name">@name</a> = name<br> <a href="http://twitter.com/tasks">@tasks</a> = task<br> end</pre><pre># o we can simply add more task to our array <br> def add_task(task)<br> <a href="http://twitter.com/tasks">@tasks</a> << task<br> end</pre><pre># Retrieve our task list as a hash<br> def task_list<br> <a href="http://twitter.com/tasks">@tasks</a>.map { |task| [task.name, task.completed] }.to_h<br> end<br>end</pre><p>Now create a Task class that has two attributes, name and completed, which will behave like a hash with a Key (name) and a Value (completed)</p><pre>class Task<br> attr_accessor :name, :completed</pre><pre>def initialize(name, completed)<br> <a href="http://twitter.com/name">@name</a> = name<br> <a href="http://twitter.com/completed">@completed</a> = completed<br> end<br>end</pre><p>To evaluate rules and decisions, let’s create a class “Rule” that has a boolean expression and a <strong>to do</strong> model that will return if the evaluation is positive</p><pre>class Rule<br> attr_reader :expression, :to_do</pre><pre>def initialize(expression, to_do)<br> <a href="http://twitter.com/expression">@expression</a> = expression<br> <a href="http://twitter.com/to_do">@to_do</a> = to_do<br> end<br>end</pre><p>And finally, lets create a class called “Flow”, that will have the business logic to decide in which to do list we’re working</p><pre>class Flow<br> attr_accessor :current_todo<br> def initialize(rules)<br> <a href="http://twitter.com/rules">@rules</a> = rules<br> # initialize a decider class<br> <a href="http://twitter.com/decider">@decider</a> = Undecided::Decider.new<br> end</pre><pre> def start(to_do)<br> <a href="http://twitter.com/current_todo">@current_todo</a> = to_do.nil? ? next_to_do : to_do<br> end</pre><pre># Iterate every rule to match the true one<br> def next_to_do<br> <a href="http://twitter.com/rules">@rules</a>.each do |rule| <br> next if !next?(rule)<br> <a href="http://twitter.com/current_todo">@current_todo</a> = rule.to_do<br> break <br> end<br> # return the same if none of the rules are true<br> end</pre><pre># check if the rule is met<br> def next?(rule)<br> <a href="http://twitter.com/decider">@decider</a>.decide(rule.expression, <a href="http://twitter.com/current_todo">@current_todo</a>.task_list, false)<br> end <br>end</pre><p>To test it, let’s instance some classes</p><pre># let's define our clases</pre><pre># tasks<br>task_a = Task.new(:a, false)<br>task_b = Task.new(:b, true)<br>task_c = Task.new(:c, false)<br>task_d = Task.new(:d, true)<br>task_e = Task.new(:e, false)</pre><pre># todos<br>todo_a = ToDo.new('todo_a', [task_a, task_d])<br>todo_b = ToDo.new('todo_b', [task_a, task_b, task_e])<br>todo_c = ToDo.new('todo_c', [task_c, task_d])<br>todo_d = ToDo.new('todo_d', [task_e, task_b])</pre><pre># rules<br>rules = [<br> Rule.new('!a&!b', todo_b),<br> Rule.new('a&b&c!d|(d|c)', todo_c),<br> Rule.new('!a&!c', todo_d),<br> Rule.new('b&e', todo_b)<br>]</pre><pre># flow<br>flow = Flow.new(rules)</pre><p>Let’s start with “<strong>todo_a”</strong></p><pre>flow.start todo_a<br># #<ToDo:0x00000001c61118 <a href="http://twitter.com/tasks">@tasks</a>=[#<Task:0x00000001a2d9c8 <a href="http://twitter.com/name">@name</a>=:a, <a href="http://twitter.com/completed">@completed</a>=false>, #<Task:0x000000023d63f8 <a href="http://twitter.com/name">@name</a>=:d, <a href="http://twitter.com/completed">@completed</a>=true>], <a href="http://twitter.com/name">@name</a>="todo_a"></pre><p>Now let’s move to another to do list check that the first rule should be positive cuz “<strong>todo_a”</strong> doesn’t have a <strong>‘d’</strong> task, so it will be false, and the <strong>‘a’</strong> task is false</p><pre>flow.next_to_do<br>##<ToDo:0x0000000215b7e0 <a href="http://twitter.com/tasks">@tasks</a>=[#<Task:0x00000002265550 <a href="http://twitter.com/name">@name</a>=:a, <a href="http://twitter.com/completed">@completed</a>=false>, #<Task:0x00000002247b68 <a href="http://twitter.com/name">@name</a>=:b, <a href="http://twitter.com/completed">@completed</a>=true>, #<Task:0x000000021c4998 <a href="http://twitter.com/name">@name</a>=:e, <a href="http://twitter.com/completed">@completed</a>=false>], <a href="http://twitter.com/name">@name</a>="todo_b"></pre><p>If we check the new todo list</p><pre>flow.current_todo.name<br># ‘todo_b’</pre><p>So it works! the rule we evaluated was <strong>Rule.new(‘!a&!b’, todo_b), </strong>then if <strong>!a&!b </strong>with values <strong>a = false, d = false, </strong>and according to</p><pre># decide function has an default argument 'stric' = true, to match the total values in the expression<br># if we want to avoid this, we simply pass it as false</pre><pre>expression = 'A&B&C'<br>values = { A:1, B:1 }<br>decider.decide(expression, values)<br># Return false cuz A&B&C means that (A==true & B==true & C==true), but C is nil, then is false</pre><p>The evaluation is positive! and it will return the new to do list <strong>“todo_b”</strong></p><p>You can keep playing with the remaining values, but I wrote it randomly, so maybe not all of them matches the remaining todo list.</p><p>Here’s the whole code:</p><iframe src="" width="0" height="0" frameborder="0" scrolling="no"><a href="https://medium.com/media/867f7569f66e8e099932381bba5852ea/href">https://medium.com/media/867f7569f66e8e099932381bba5852ea/href</a></iframe><p>To evaluate these <strong>functions, </strong>I’m using <a href="https://github.com/JotaSe/undecided"><strong>eval</strong></a><strong> </strong>to perform the functions as a code block, but before you freak out, there’s a lot of validation to avoid malicious code injection.</p><p>Feel free to use, fork, rewrite, fix, or whatever you want the <a href="https://github.com/JotaSe/undecided"><strong>gem</strong></a>, I hope it can be useful for someone.</p><p><a href="https://github.com/JotaSe/undecided">JotaSe/undecided</a></p><img src="https://medium.com/_/stat?event=post.clientViewed&referrerSource=full_rss&postId=96fc20e404c0" width="1" height="1"><hr><p><a href="https://blog.5rabbits.com/deciding-like-a-robot-not-boolean-expression-in-ruby-96fc20e404c0">Deciding like a robot. NOT! (Boolean Expression in Ruby)</a> was originally published in <a href="https://blog.5rabbits.com">5rabbits Engineering</a> on Medium, where people are continuing the conversation by highlighting and responding to this story.</p>]]></content:encoded>
|
74
|
+
</item>
|
75
|
+
</channel>
|
76
|
+
</rss>
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "tweet_manager"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/example/test.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
|
2
|
+
# define twitter credentials https://apps.twitter.com/
|
3
|
+
credentials = {
|
4
|
+
consumer_key: ENV['CONSUMER_KEY'],
|
5
|
+
consumer_secret: ENV['CONSUMER_SECRET'],
|
6
|
+
access_token: ENV['ACCESS_TOKEN'],
|
7
|
+
access_token_secret: ENV['ACCESS_TOKEN_SECRET']
|
8
|
+
}
|
9
|
+
|
10
|
+
# define youtube developer key https://console.developers.google.com/
|
11
|
+
youtube_api_key = ENV['YOUTUBE_KEY']
|
12
|
+
|
13
|
+
# instance a tweet class
|
14
|
+
tweet = TweetManager::Tweet.new(credentials, youtube_api_key)
|
15
|
+
|
16
|
+
# Youtube content
|
17
|
+
|
18
|
+
# define the channel id
|
19
|
+
channel_id = 'UCVbaQ_GvmiOapfn2GOyBiLQ' # 5rabbits's channel
|
20
|
+
|
21
|
+
# Tweet last youtube video from a channel
|
22
|
+
tweet.action(service: 'youtube', type: 'last', target: channel_id)
|
23
|
+
|
24
|
+
# Tweet random youtube video from a channel
|
25
|
+
tweet.action(service: 'youtube', type: 'random', target: channel_id)
|
26
|
+
|
27
|
+
# if you wanna add extra content to your tweet as mentions or whatever you want
|
28
|
+
# add an 'extra' argument as array
|
29
|
+
tweet.action(service: 'youtube', type: 'random', target: channel_id, extras: ['@5rabbitsHQ', 'Check this out!'])
|
30
|
+
|
31
|
+
# Medium contentn
|
32
|
+
# Define a medium user
|
33
|
+
medium_user = '@_jotase'
|
34
|
+
|
35
|
+
# or a custom domain for medium
|
36
|
+
blog = 'blog.5rabbits.com'
|
37
|
+
|
38
|
+
# Tweet last story from a user
|
39
|
+
tweet.action(service: 'medium', type: 'last', target: medium_user)
|
40
|
+
|
41
|
+
# Tweet last story from a custom domain
|
42
|
+
tweet.action(service: 'medium', type: 'last', target: blog)
|
43
|
+
|
44
|
+
# Tweet random story from a user
|
45
|
+
tweet.action(service: 'medium', type: 'random', target: medium_user)
|
46
|
+
|
47
|
+
# Tweet random story from a custom domain
|
48
|
+
tweet.action(service: 'medium', type: 'random', target: blog)
|
49
|
+
|
50
|
+
# if you wanna add extra content to your tweet as mentions or whatever you want
|
51
|
+
# as before, add an 'extra' argument as array
|
52
|
+
tweet.action(service: 'medium', type: 'random', target: blog, extras: ['@5rabbitsHQ', 'Check this out!'])
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'mechanize'
|
2
|
+
# Retrieve user's content from Medium
|
3
|
+
class Medium
|
4
|
+
def initialize
|
5
|
+
@agent = Mechanize.new
|
6
|
+
end
|
7
|
+
|
8
|
+
def stories(target)
|
9
|
+
url = target['@'].nil? ? url_from_domain(target) : url_from_user(target)
|
10
|
+
url = URI.join('http', url).to_s
|
11
|
+
page = Mechanize.new.get url
|
12
|
+
parse_stories page
|
13
|
+
end
|
14
|
+
|
15
|
+
def last_content(target)
|
16
|
+
stories(target).first
|
17
|
+
end
|
18
|
+
|
19
|
+
def random_content(target)
|
20
|
+
stories(target).sample
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def url_from_user(username)
|
26
|
+
"https://medium.com/feed/#{username}"
|
27
|
+
end
|
28
|
+
|
29
|
+
def url_from_domain(domain)
|
30
|
+
domain = "https://#{domain}" unless domain =~ /\A#{URI.regexp(%w[http https])}\z/
|
31
|
+
"#{domain}/feed"
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse_stories(page)
|
35
|
+
page.search('//item').map do |entry|
|
36
|
+
next if entry.search('.//category').empty?
|
37
|
+
parse_entry(entry)
|
38
|
+
end.compact
|
39
|
+
end
|
40
|
+
|
41
|
+
def parse_entry(entry)
|
42
|
+
{
|
43
|
+
title: entry.search('.//title').text,
|
44
|
+
url: entry.search('.//link').text
|
45
|
+
}
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'yt'
|
2
|
+
# Retrieve channel's video from YouTube
|
3
|
+
class Youtube
|
4
|
+
CHANNEL_ENDPOINT = 'https://www.googleapis.com/youtube/v3/channels'.freeze
|
5
|
+
PLAYLIST_ENDPOINT = 'https://www.googleapis.com/youtube/v3/playlistItems'.freeze
|
6
|
+
YOUTUBE_URL = 'https://www.youtube.com/watch'.freeze
|
7
|
+
|
8
|
+
def initialize(api_key)
|
9
|
+
@api_key = api_key
|
10
|
+
@agent = Mechanize.new
|
11
|
+
end
|
12
|
+
|
13
|
+
def videos(channel_id)
|
14
|
+
playlists(channel_id).map { |playlist| video_from_playlist(playlist) }.flatten
|
15
|
+
end
|
16
|
+
|
17
|
+
def video_from_playlist(playlist_id)
|
18
|
+
params = {
|
19
|
+
playlistId: playlist_id,
|
20
|
+
key: @api_key,
|
21
|
+
part: 'snippet',
|
22
|
+
maxResults: 50
|
23
|
+
}
|
24
|
+
response = parse_response(@agent.get(PLAYLIST_ENDPOINT, params))
|
25
|
+
parse_videos(response)
|
26
|
+
end
|
27
|
+
|
28
|
+
def last_content(channel_id)
|
29
|
+
videos(channel_id).first
|
30
|
+
end
|
31
|
+
|
32
|
+
def random_content(channel_id)
|
33
|
+
videos(channel_id).sample
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def parse_response(response)
|
39
|
+
JSON.parse(response.body)
|
40
|
+
end
|
41
|
+
|
42
|
+
def playlists(channel_id)
|
43
|
+
params = {
|
44
|
+
key: @api_key,
|
45
|
+
part: 'contentDetails',
|
46
|
+
id: channel_id
|
47
|
+
}
|
48
|
+
response = parse_response(@agent.get(CHANNEL_ENDPOINT, params))
|
49
|
+
response['items'].map { |item| item['contentDetails'].values.first['uploads'] }
|
50
|
+
end
|
51
|
+
|
52
|
+
def parse_videos(result)
|
53
|
+
result['items'].map { |item| parse_video(item['snippet']) }
|
54
|
+
end
|
55
|
+
|
56
|
+
def parse_video(video)
|
57
|
+
{
|
58
|
+
title: video['title'],
|
59
|
+
url: "#{YOUTUBE_URL}/#{video['resourceId']['videoId']}"
|
60
|
+
}
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'tweet_manager/version'
|
2
|
+
require 'service/medium'
|
3
|
+
require 'service/youtube'
|
4
|
+
require 'twitter'
|
5
|
+
|
6
|
+
module TweetManager
|
7
|
+
# Tweet content
|
8
|
+
class Tweet
|
9
|
+
AVALAIBLE_SERVICES = %w[medium youtube].freeze
|
10
|
+
TYPES = %w[last random].freeze
|
11
|
+
|
12
|
+
def initialize(credentials, youtube_api_key)
|
13
|
+
@client = Twitter::REST::Client.new do |config|
|
14
|
+
config.consumer_key = credentials[:consumer_key]
|
15
|
+
config.consumer_secret = credentials[:consumer_secret]
|
16
|
+
config.access_token = credentials[:access_token]
|
17
|
+
config.access_token_secret = credentials[:access_token_secret]
|
18
|
+
end
|
19
|
+
@youtube_api_key = youtube_api_key
|
20
|
+
end
|
21
|
+
|
22
|
+
def action(service:, type:, target:, extras: [])
|
23
|
+
raise 'Non existing service' unless AVALAIBLE_SERVICES.include? service
|
24
|
+
raise 'Non existing type' unless TYPES.include? type
|
25
|
+
result = send(service).send("#{type}_content", target)
|
26
|
+
update(result, extras)
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def update(args, extras)
|
32
|
+
msg = "#{args[:title]} #{args[:url]} #{extras.join(' ')}"
|
33
|
+
@client.update(msg)
|
34
|
+
end
|
35
|
+
|
36
|
+
def medium
|
37
|
+
@medium ||= Medium.new
|
38
|
+
end
|
39
|
+
|
40
|
+
def youtube
|
41
|
+
@youtube ||= Youtube.new @youtube_api_key
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require 'tweet_manager/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |spec|
|
8
|
+
spec.name = 'tweet_manager'
|
9
|
+
spec.version = TweetManager::VERSION
|
10
|
+
spec.authors = ['jotase']
|
11
|
+
spec.email = ['jota.segovia@gmail.com']
|
12
|
+
|
13
|
+
spec.summary = 'Twitter bot that tweet content from different sources'
|
14
|
+
spec.description = 'Twitter bot that tweet content from different sources, ATM Medium posts and youtube videos'
|
15
|
+
spec.homepage = 'https://github.com/5rabbits/tweet_manager'
|
16
|
+
spec.license = 'MIT'
|
17
|
+
|
18
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
19
|
+
spec.bindir = 'exe'
|
20
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
21
|
+
spec.require_paths = ['lib']
|
22
|
+
|
23
|
+
spec.add_development_dependency 'bundler', '~> 1.11'
|
24
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
25
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
26
|
+
spec.add_development_dependency 'byebug'
|
27
|
+
|
28
|
+
spec.add_dependency 'mechanize'
|
29
|
+
spec.add_dependency 'yt'
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tweet_manager
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- jotase
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-07-28 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.11'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.11'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: byebug
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: mechanize
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: yt
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: Twitter bot that tweet content from different sources, ATM Medium posts
|
98
|
+
and youtube videos
|
99
|
+
email:
|
100
|
+
- jota.segovia@gmail.com
|
101
|
+
executables: []
|
102
|
+
extensions: []
|
103
|
+
extra_rdoc_files: []
|
104
|
+
files:
|
105
|
+
- ".byebug_history"
|
106
|
+
- ".gitignore"
|
107
|
+
- ".rspec"
|
108
|
+
- ".travis.yml"
|
109
|
+
- CODE_OF_CONDUCT.md
|
110
|
+
- Gemfile
|
111
|
+
- LICENSE.txt
|
112
|
+
- README.md
|
113
|
+
- Rakefile
|
114
|
+
- a.xml
|
115
|
+
- bin/console
|
116
|
+
- bin/setup
|
117
|
+
- example/test.rb
|
118
|
+
- lib/service/medium.rb
|
119
|
+
- lib/service/youtube.rb
|
120
|
+
- lib/tweet_manager.rb
|
121
|
+
- lib/tweet_manager/version.rb
|
122
|
+
- tweet_manager.gemspec
|
123
|
+
homepage: https://github.com/5rabbits/tweet_manager
|
124
|
+
licenses:
|
125
|
+
- MIT
|
126
|
+
metadata: {}
|
127
|
+
post_install_message:
|
128
|
+
rdoc_options: []
|
129
|
+
require_paths:
|
130
|
+
- lib
|
131
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
132
|
+
requirements:
|
133
|
+
- - ">="
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
version: '0'
|
136
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
137
|
+
requirements:
|
138
|
+
- - ">="
|
139
|
+
- !ruby/object:Gem::Version
|
140
|
+
version: '0'
|
141
|
+
requirements: []
|
142
|
+
rubyforge_project:
|
143
|
+
rubygems_version: 2.5.1
|
144
|
+
signing_key:
|
145
|
+
specification_version: 4
|
146
|
+
summary: Twitter bot that tweet content from different sources
|
147
|
+
test_files: []
|