wuclan 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE.textile +20 -0
- data/README.textile +28 -0
- data/examples/analyze/strong_links/gen_multi_edge.rb +103 -0
- data/examples/analyze/strong_links/main.rb +51 -0
- data/examples/analyze/word_count/dump_schema.rb +13 -0
- data/examples/analyze/word_count/freq_user.rb +31 -0
- data/examples/analyze/word_count/freq_whole_corpus.rb +27 -0
- data/examples/analyze/word_count/word_count.pig +43 -0
- data/examples/analyze/word_count/word_count.rb +34 -0
- data/examples/lastfm/scrape/load_lastfm.rb +31 -0
- data/examples/lastfm/scrape/scrape_lastfm.rb +47 -0
- data/examples/lastfm/scrape/seed.tsv +147 -0
- data/examples/twitter/old/load_twitter_search_jobs.rb +157 -0
- data/examples/twitter/old/scrape_twitter_api.rb +104 -0
- data/examples/twitter/old/scrape_twitter_search.rb +57 -0
- data/examples/twitter/old/scrape_twitter_trending.rb +73 -0
- data/examples/twitter/parse/parse_twitter_requests.rb +81 -0
- data/examples/twitter/parse/parse_twitter_search_requests.rb +28 -0
- data/examples/twitter/scrape_twitter_api/scrape_twitter_api.rb +61 -0
- data/examples/twitter/scrape_twitter_api/seed.tsv +4 -0
- data/examples/twitter/scrape_twitter_api/start_cache_twitter.sh +2 -0
- data/examples/twitter/scrape_twitter_api/support/make_request_stats.rb +291 -0
- data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_1.rb +98 -0
- data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_2.pig +4 -0
- data/examples/twitter/scrape_twitter_api/support/twitter_search_jobs.tsv +6 -0
- data/examples/twitter/scrape_twitter_api/support/twitter_trending_seed.tsv +725 -0
- data/examples/twitter/scrape_twitter_hosebird/edamame-killall +4 -0
- data/examples/twitter/scrape_twitter_hosebird/foo.rb +19 -0
- data/examples/twitter/scrape_twitter_hosebird/ps_emulation.rb +111 -0
- data/examples/twitter/scrape_twitter_hosebird/scrape_twitter_hosebird.rb +110 -0
- data/examples/twitter/scrape_twitter_hosebird/test_spewer.rb +20 -0
- data/examples/twitter/scrape_twitter_hosebird/twitter_hosebird_god.yaml +10 -0
- data/examples/twitter/scrape_twitter_search/dump_twitter_search_jobs.rb +38 -0
- data/examples/twitter/scrape_twitter_search/load_twitter_search_jobs.rb +63 -0
- data/examples/twitter/scrape_twitter_search/scrape_twitter_search.rb +44 -0
- data/examples/twitter/scrape_twitter_search/twitter_search_daemons.god +25 -0
- data/lib/old/twitter_api.rb +88 -0
- data/lib/wuclan/delicious/delicious_html_request.rb +31 -0
- data/lib/wuclan/delicious/delicious_models.rb +26 -0
- data/lib/wuclan/delicious/delicious_request.rb +65 -0
- data/lib/wuclan/friendfeed/scrape/friendfeed_search_request.rb +60 -0
- data/lib/wuclan/friendster.rb +7 -0
- data/lib/wuclan/lastfm/model/base.rb +49 -0
- data/lib/wuclan/lastfm/model/sample_responses.txt +16 -0
- data/lib/wuclan/lastfm/scrape/base.rb +195 -0
- data/lib/wuclan/lastfm/scrape/concrete.rb +143 -0
- data/lib/wuclan/lastfm/scrape/lastfm_job.rb +12 -0
- data/lib/wuclan/lastfm/scrape/lastfm_request_stream.rb +17 -0
- data/lib/wuclan/lastfm/scrape/recursive_requests.rb +154 -0
- data/lib/wuclan/lastfm/scrape.rb +12 -0
- data/lib/wuclan/lastfm.rb +7 -0
- data/lib/wuclan/metrics/user_graph_metrics.rb +99 -0
- data/lib/wuclan/metrics/user_metrics.rb +443 -0
- data/lib/wuclan/metrics/user_metrics_basic.rb +277 -0
- data/lib/wuclan/metrics/user_scraping_metrics.rb +64 -0
- data/lib/wuclan/metrics.rb +0 -0
- data/lib/wuclan/myspace.rb +21 -0
- data/lib/wuclan/open_social/model/base.rb +0 -0
- data/lib/wuclan/open_social/scrape/base.rb +111 -0
- data/lib/wuclan/open_social/scrape_request.rb +6 -0
- data/lib/wuclan/open_social.rb +0 -0
- data/lib/wuclan/rdf_output/relationship_rdf.rb +47 -0
- data/lib/wuclan/rdf_output/text_element_rdf.rb +64 -0
- data/lib/wuclan/rdf_output/tweet_rdf.rb +10 -0
- data/lib/wuclan/rdf_output/twitter_rdf.rb +84 -0
- data/lib/wuclan/rdf_output/twitter_user_rdf.rb +12 -0
- data/lib/wuclan/shorturl/shorturl_request.rb +271 -0
- data/lib/wuclan/twitter/api_response_examples.textile +300 -0
- data/lib/wuclan/twitter/model/base.rb +72 -0
- data/lib/wuclan/twitter/model/multi_edge.rb +31 -0
- data/lib/wuclan/twitter/model/relationship.rb +176 -0
- data/lib/wuclan/twitter/model/text_element/extract_info_tests.rb +83 -0
- data/lib/wuclan/twitter/model/text_element/grok_tweets.rb +96 -0
- data/lib/wuclan/twitter/model/text_element/more_regexes.rb +370 -0
- data/lib/wuclan/twitter/model/text_element.rb +38 -0
- data/lib/wuclan/twitter/model/tweet/tokenize.rb +38 -0
- data/lib/wuclan/twitter/model/tweet/tweet_regexes.rb +202 -0
- data/lib/wuclan/twitter/model/tweet/tweet_token.rb +79 -0
- data/lib/wuclan/twitter/model/tweet.rb +74 -0
- data/lib/wuclan/twitter/model/twitter_user/style/color_to_hsv.rb +57 -0
- data/lib/wuclan/twitter/model/twitter_user.rb +145 -0
- data/lib/wuclan/twitter/model.rb +21 -0
- data/lib/wuclan/twitter/parse/ff_ids_parser.rb +27 -0
- data/lib/wuclan/twitter/parse/friends_followers_parser.rb +52 -0
- data/lib/wuclan/twitter/parse/generic_json_parser.rb +26 -0
- data/lib/wuclan/twitter/parse/json_tweet.rb +63 -0
- data/lib/wuclan/twitter/parse/json_twitter_user.rb +122 -0
- data/lib/wuclan/twitter/parse/public_timeline_parser.rb +54 -0
- data/lib/wuclan/twitter/parse/twitter_search_parse.rb +60 -0
- data/lib/wuclan/twitter/parse/user_parser.rb +30 -0
- data/lib/wuclan/twitter/scrape/base.rb +97 -0
- data/lib/wuclan/twitter/scrape/old_skool_request_classes.rb +40 -0
- data/lib/wuclan/twitter/scrape/twitter_fake_fetcher.rb +31 -0
- data/lib/wuclan/twitter/scrape/twitter_ff_ids_request.rb +75 -0
- data/lib/wuclan/twitter/scrape/twitter_followers_request.rb +135 -0
- data/lib/wuclan/twitter/scrape/twitter_json_response.rb +124 -0
- data/lib/wuclan/twitter/scrape/twitter_request_stream.rb +44 -0
- data/lib/wuclan/twitter/scrape/twitter_search_fake_fetcher.rb +44 -0
- data/lib/wuclan/twitter/scrape/twitter_search_flat_stream.rb +30 -0
- data/lib/wuclan/twitter/scrape/twitter_search_job.rb +25 -0
- data/lib/wuclan/twitter/scrape/twitter_search_request.rb +70 -0
- data/lib/wuclan/twitter/scrape/twitter_search_request_stream.rb +19 -0
- data/lib/wuclan/twitter/scrape/twitter_timeline_request.rb +72 -0
- data/lib/wuclan/twitter/scrape/twitter_user_request.rb +64 -0
- data/lib/wuclan/twitter/scrape.rb +27 -0
- data/lib/wuclan/twitter.rb +7 -0
- data/lib/wuclan.rb +1 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/wuclan_spec.rb +7 -0
- data/wuclan.gemspec +184 -0
- metadata +219 -0
@@ -0,0 +1,370 @@
|
|
1
|
+
|
2
|
+
# http://github.com/Empact/html_test/tree/master
|
3
|
+
# http://github.com/michaeledgar/validates_not_profane
|
4
|
+
#
|
5
|
+
# http://github.com/porras/livevalidation/tree/master
|
6
|
+
# Rails plugin which allows automatic integration of your Rails application with Javascript library LiveValidation. This library implements client-side form validation and you can
|
7
|
+
#
|
8
|
+
# http://github.com/cainlevy/semantic-attributes
|
9
|
+
#
|
10
|
+
# git://github.com/alexdunae/validates_email_format_of.git
|
11
|
+
# Validate e-mail addreses against RFC 2822 and RFC 3696 with this popular Ruby on Rails plugin and gem.
|
12
|
+
#
|
13
|
+
# http://github.com/freelancing-god/active-matchers/tree/master
|
14
|
+
# Helpful rspec matchers for testing validations and associations.
|
15
|
+
#
|
16
|
+
# http://github.com/redinger/validation_reflection/tree/master
|
17
|
+
# refl = Person.reflect_on_validations_for(:name)
|
18
|
+
# refl[0].macro
|
19
|
+
# => :validates_presence_of
|
20
|
+
#
|
21
|
+
# http://github.com/augustl/live-validations/tree/master
|
22
|
+
# Reads Active Record's validations and makes them available to live client side javascript validation scripts
|
23
|
+
#
|
24
|
+
# http://github.com/adzap/validates_timeliness/tree/master
|
25
|
+
# Date and time validation plugin for Rails 2.x and allows custom date/time formats
|
26
|
+
|
27
|
+
# http://github.com/matthewrudy/regexpert/tree/master
|
28
|
+
# Description: A collection of common Regexps for Ruby. Validation for emails, uk postcode, etc.
|
29
|
+
#
|
30
|
+
|
31
|
+
# http://plugins.jquery.com/project/validate
|
32
|
+
#
|
33
|
+
#
|
34
|
+
|
35
|
+
# ===========================================================================
|
36
|
+
#
|
37
|
+
# # http://github.com/matthewrudy/regexpert/blob/master/lib/regexpert.rb
|
38
|
+
#
|
39
|
+
# module Format
|
40
|
+
# # This is taken from dm-more - http://github.com/sam/dm-more/tree/master/dm-validations/lib/dm-validations/formats/email.rb
|
41
|
+
# # RFC2822 (No attribution reference available)
|
42
|
+
# #
|
43
|
+
# # doctest: email_address
|
44
|
+
# # >> "MatthewRudyJacobs@gmail.com" =~ Regexpert::Format::EmailAddress
|
45
|
+
# # => 0
|
46
|
+
# #
|
47
|
+
# # >> "dev@" =~ Regexpert::Format::EmailAddress
|
48
|
+
# # => nil
|
49
|
+
# #
|
50
|
+
# EmailAddress = begin
|
51
|
+
# alpha = "a-zA-Z"
|
52
|
+
# digit = "0-9"
|
53
|
+
# atext = "[#{alpha}#{digit}\!\#\$\%\&\'\*+\/\=\?\^\_\`\{\|\}\~\-]"
|
54
|
+
# dot_atom_text = "#{atext}+([.]#{atext}*)*"
|
55
|
+
# dot_atom = "#{dot_atom_text}"
|
56
|
+
# qtext = '[^\\x0d\\x22\\x5c\\x80-\\xff]'
|
57
|
+
# text = "[\\x01-\\x09\\x11\\x12\\x14-\\x7f]"
|
58
|
+
# quoted_pair = "(\\x5c#{text})"
|
59
|
+
# qcontent = "(?:#{qtext}|#{quoted_pair})"
|
60
|
+
# quoted_string = "[\"]#{qcontent}+[\"]"
|
61
|
+
# atom = "#{atext}+"
|
62
|
+
# word = "(?:#{atom}|#{quoted_string})"
|
63
|
+
# obs_local_part = "#{word}([.]#{word})*"
|
64
|
+
# local_part = "(?:#{dot_atom}|#{quoted_string}|#{obs_local_part})"
|
65
|
+
# no_ws_ctl = "\\x01-\\x08\\x11\\x12\\x14-\\x1f\\x7f"
|
66
|
+
# dtext = "[#{no_ws_ctl}\\x21-\\x5a\\x5e-\\x7e]"
|
67
|
+
# dcontent = "(?:#{dtext}|#{quoted_pair})"
|
68
|
+
# domain_literal = "\\[#{dcontent}+\\]"
|
69
|
+
# obs_domain = "#{atom}([.]#{atom})*"
|
70
|
+
# domain = "(?:#{dot_atom}|#{domain_literal}|#{obs_domain})"
|
71
|
+
# addr_spec = "#{local_part}\@#{domain}"
|
72
|
+
# pattern = /^#{addr_spec}$/
|
73
|
+
# end
|
74
|
+
#
|
75
|
+
# # This is taken from dm-more http://github.com/sam/dm-more/tree/master/dm-validations/lib/dm-validations/formats/url.rb
|
76
|
+
# # Regex from http://www.igvita.com/2006/09/07/validating-url-in-ruby-on-rails/
|
77
|
+
# #
|
78
|
+
# # doctest: url # examples from Rails auto_link tests
|
79
|
+
# # >> "http://www.rubyonrails.com/contact;new" =~ Regexpert::Format::Url
|
80
|
+
# # => 0
|
81
|
+
# # >> "http://maps.google.co.uk/maps?f=q&q=the+london+eye&ie=UTF8&ll=51.503373,-0.11939&spn=0.007052,0.012767&z=16&iwloc=A" =~ Regexpert::Format::Url
|
82
|
+
# # => 0
|
83
|
+
# # >> "http://en.wikipedia.org/wiki/Sprite_(computer_graphics)" =~ Regexpert::Format::Url
|
84
|
+
# # => 0
|
85
|
+
# # TODO: think of a good example of a bad url
|
86
|
+
# Url = begin
|
87
|
+
# /(^$)|(^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$)/ix
|
88
|
+
# end
|
89
|
+
#
|
90
|
+
# # This is taken from Django.Contrib.Localflavor.uk
|
91
|
+
# # The regular expression used is sourced from the schema for British Standard
|
92
|
+
# # BS7666 address types: http://www.govtalk.gov.uk/gdsc/schemas/bs7666-v2-0.xsd
|
93
|
+
# #
|
94
|
+
# # doctest: ukpostcode
|
95
|
+
# # >> "GIR 0AA" =~ Regexpert::Format::UKPostcode # GIR 0AA is a special GIRO postcode
|
96
|
+
# # => 0
|
97
|
+
# # >> "AL40XB" =~ Regexpert::Format::UKPostcode
|
98
|
+
# # => 0
|
99
|
+
# # >> "CB4 1TL" =~ Regexpert::Format::UKPostcode
|
100
|
+
# # => 0
|
101
|
+
# #
|
102
|
+
# # >> "AL44 NOP" =~ Regexpert::Format::UKPostcode
|
103
|
+
# # => nil
|
104
|
+
# # >> "CB4-1TL" =~ Regexpert::Format::UKPostcode
|
105
|
+
# # => nil
|
106
|
+
# #
|
107
|
+
# UKPostcode = begin
|
108
|
+
# outcode_pattern = '[A-PR-UWYZ]([0-9]{1,2}|([A-HIK-Y][0-9](|[0-9]|[ABEHMNPRVWXY]))|[0-9][A-HJKSTUW])'
|
109
|
+
# incode_pattern = '[0-9][ABD-HJLNP-UW-Z]{2}'
|
110
|
+
# postcode_regex = Regexp.new("^(GIR *0AA|#{outcode_pattern} *#{incode_pattern})$", Regexp::IGNORECASE)
|
111
|
+
# end
|
112
|
+
|
113
|
+
|
114
|
+
# ===========================================================================
|
115
|
+
#
|
116
|
+
# http://www.botvector.net/2008/05/regular-expression-samples.html
|
117
|
+
#
|
118
|
+
#
|
119
|
+
# //Address: State code (US)
|
120
|
+
# '/\\b(?:A[KLRZ]|C[AOT]|D[CE]|FL|GA|HI|I[ADLN]|K[SY]|LA|M[ADEINOST]|N[CDEHJMVY]|O[HKR]|PA|RI|S[CD]|T[NX]|UT|V[AT]|W[AIVY])\\b/'
|
121
|
+
#
|
122
|
+
# //Address: ZIP code (US)
|
123
|
+
# '\b[0-9]{5}(?:-[0-9]{4})?\b'
|
124
|
+
#
|
125
|
+
# //Credit card: All major cards
|
126
|
+
# '^(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|6011[0-9]{12}|3(?:0[0-5]|[68][0-9])[0-9]{11}|3[47][0-9]{13})$'
|
127
|
+
#
|
128
|
+
# //Credit card: American Express
|
129
|
+
# '^3[47][0-9]{13}$'
|
130
|
+
#
|
131
|
+
# //Credit card: Diners Club
|
132
|
+
# '^3(?:0[0-5]|[68][0-9])[0-9]{11}$'
|
133
|
+
#
|
134
|
+
# //Credit card: Discover
|
135
|
+
# '^6011[0-9]{12}$'
|
136
|
+
#
|
137
|
+
# //Credit card: MasterCard
|
138
|
+
# '^5[1-5][0-9]{14}$'
|
139
|
+
#
|
140
|
+
# //Credit card: Visa
|
141
|
+
# '^4[0-9]{12}(?:[0-9]{3})?$'
|
142
|
+
#
|
143
|
+
# //Credit card: remove non-digits
|
144
|
+
# '/[^0-9]+/'
|
145
|
+
#
|
146
|
+
# //Date d/m/yy and dd/mm/yyyy
|
147
|
+
# //1/1/00 through 31/12/99 and 01/01/1900 through 31/12/2099
|
148
|
+
# //Matches invalid dates such as February 31st
|
149
|
+
# '\b(0?[1-9]|[12][0-9]|3[01])[- /.](0?[1-9]|1[012])[- /.](19|20)?[0-9]{2}\b'
|
150
|
+
#
|
151
|
+
# //Date dd/mm/yyyy
|
152
|
+
# //01/01/1900 through 31/12/2099
|
153
|
+
# //Matches invalid dates such as February 31st
|
154
|
+
# '(0[1-9]|[12][0-9]|3[01])[- /.](0[1-9]|1[012])[- /.](19|20)[0-9]{2}'
|
155
|
+
#
|
156
|
+
# //Date m/d/y and mm/dd/yyyy
|
157
|
+
# //1/1/99 through 12/31/99 and 01/01/1900 through 12/31/2099
|
158
|
+
# //Matches invalid dates such as February 31st
|
159
|
+
# //Accepts dashes, spaces, forward slashes and dots as date separators
|
160
|
+
# '\b(0?[1-9]|1[012])[- /.](0?[1-9]|[12][0-9]|3[01])[- /.](19|20)?[0-9]{2}\b'
|
161
|
+
#
|
162
|
+
# //Date mm/dd/yyyy
|
163
|
+
# //01/01/1900 through 12/31/2099
|
164
|
+
# //Matches invalid dates such as February 31st
|
165
|
+
# '(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.](19|20)[0-9]{2}'
|
166
|
+
#
|
167
|
+
# //Date yy-m-d or yyyy-mm-dd
|
168
|
+
# //00-1-1 through 99-12-31 and 1900-01-01 through 2099-12-31
|
169
|
+
# //Matches invalid dates such as February 31st
|
170
|
+
# '\b(19|20)?[0-9]{2}[- /.](0?[1-9]|1[012])[- /.](0?[1-9]|[12][0-9]|3[01])\b'
|
171
|
+
#
|
172
|
+
# //Date yyyy-mm-dd
|
173
|
+
# //1900-01-01 through 2099-12-31
|
174
|
+
# //Matches invalid dates such as February 31st
|
175
|
+
# '(19|20)[0-9]{2}[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])'
|
176
|
+
#
|
177
|
+
#
|
178
|
+
# //IP address
|
179
|
+
# //Matches 0.0.0.0 through 999.999.999.999
|
180
|
+
# //Use this fast and simple regex if you know the data does not contain invalid IP addresses.
|
181
|
+
# '\b([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})\b'
|
182
|
+
#
|
183
|
+
# //IP address
|
184
|
+
# //Matches 0.0.0.0 through 999.999.999.999
|
185
|
+
# //Use this fast and simple regex if you know the data does not contain invalid IP addresses,
|
186
|
+
# //and you don't need access to the individual IP numbers.
|
187
|
+
# '\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b'
|
188
|
+
#
|
189
|
+
# //IP address
|
190
|
+
# //Matches 0.0.0.0 through 255.255.255.255
|
191
|
+
# //Use this regex to match IP numbers with accurracy, without access to the individual IP numbers.
|
192
|
+
# '\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b'
|
193
|
+
#
|
194
|
+
# //IP address
|
195
|
+
# //Matches 0.0.0.0 through 255.255.255.255
|
196
|
+
# //Use this regex to match IP numbers with accurracy.
|
197
|
+
# //Each of the 4 numbers is stored into a capturing group, so you can access them for further processing.
|
198
|
+
# '\b(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b'
|
199
|
+
#
|
200
|
+
#
|
201
|
+
# //Number: Currency amount
|
202
|
+
# //Optional thousands separators; optional two-digit fraction
|
203
|
+
# '\b[0-9]{1,3}(?:,?[0-9]{3})*(?:\.[0-9]{2})?\b'
|
204
|
+
#
|
205
|
+
# //Number: Currency amount
|
206
|
+
# //Optional thousands separators; mandatory two-digit fraction
|
207
|
+
# '\b[0-9]{1,3}(?:,?[0-9]{3})*\.[0-9]{2}\b'
|
208
|
+
#
|
209
|
+
# //Number: floating point
|
210
|
+
# //Matches an integer or a floating point number with mandatory integer part. The sign is optional.
|
211
|
+
# '[-+]?\b[0-9]+(\.[0-9]+)?\b'
|
212
|
+
#
|
213
|
+
# //Number: floating point
|
214
|
+
# //Matches an integer or a floating point number with optional integer part. The sign is optional.
|
215
|
+
# '[-+]?\b[0-9]*\.?[0-9]+\b'
|
216
|
+
#
|
217
|
+
# //Number: hexadecimal (C-style)
|
218
|
+
# '\b0[xX][0-9a-fA-F]+\b'
|
219
|
+
#
|
220
|
+
# //Number: Insert thousands separators
|
221
|
+
# //Replaces 123456789.00 with 123,456,789.00
|
222
|
+
# '(?<=[0-9])(?=(?:[0-9]{3})+(?![0-9]))' //Number: integer //Will match 123 and 456 as separate integer numbers in 123.456 '\b\d+\b' //Number: integer //Does not match numbers like 123.456 '(?
|
223
|
+
#
|
224
|
+
# Passwords
|
225
|
+
#
|
226
|
+
#
|
227
|
+
# //Password complexity
|
228
|
+
# //Tests if the input consists of 6 or more letters, digits, underscores and hyphens.
|
229
|
+
# //The input must contain at least one upper case letter, one lower case letter and one digit.
|
230
|
+
# '\A(?=[-_a-zA-Z0-9]*?[A-Z])(?=[-_a-zA-Z0-9]*?[a-z])(?=[-_a-zA-Z0-9]*?[0-9])[-_a-zA-Z0-9]{6,}\z'
|
231
|
+
#
|
232
|
+
# //Password complexity
|
233
|
+
# //Tests if the input consists of 6 or more characters.
|
234
|
+
# //The input must contain at least one upper case letter, one lower case letter and one digit.
|
235
|
+
# '\A(?=[-_a-zA-Z0-9]*?[A-Z])(?=[-_a-zA-Z0-9]*?[a-z])(?=[-_a-zA-Z0-9]*?[0-9])\S{6,}\z'
|
236
|
+
#
|
237
|
+
# //Path: Windows
|
238
|
+
# '\b[a-z]:\\[^/:*?"<>|\r\n]*'
|
239
|
+
#
|
240
|
+
# //Path: Windows
|
241
|
+
# //Different elements of the path are captured into backreferences.
|
242
|
+
# '\b((?#drive)[a-z]):\\((?#folder)[^/:*?"<>|\r\n]*\\)?((?#file)[^\\/:*?"<>|\r\n]*)'
|
243
|
+
#
|
244
|
+
# //Path: Windows or UNC
|
245
|
+
# '(?:(?#drive)\b[a-z]:|\\\\[a-z0-9]+)\\[^/:*?"<>|\r\n]*'
|
246
|
+
#
|
247
|
+
# //Path: Windows or UNC
|
248
|
+
# //Different elements of the path are captured into backreferences.
|
249
|
+
# '((?#drive)\b[a-z]:|\\\\[a-z0-9]+)\\((?#folder)[^/:*?"<>|\r\n]*\\)?((?#file)[^\\/:*?"<>|\r\n]*)'
|
250
|
+
|
251
|
+
# //Phone Number (North America)
|
252
|
+
# //Matches 3334445555, 333.444.5555, 333-444-5555, 333 444 5555, (333) 444 5555 and all combinations thereof.
|
253
|
+
# //Replaces all those with (333) 444-5555
|
254
|
+
# preg_replace('\(?([0-9]{3})\)?[-. ]?([0-9]{3})[-. ]?([0-9]{4})', '(\1) \2-\3', $text);
|
255
|
+
#
|
256
|
+
# //Phone Number (North America)
|
257
|
+
# //Matches 3334445555, 333.444.5555, 333-444-5555, 333 444 5555, (333) 444 5555 and all combinations thereof.
|
258
|
+
# '\(?[0-9]{3}\)?[-. ]?[0-9]{3}[-. ]?[0-9]{4}'
|
259
|
+
|
260
|
+
|
261
|
+
# Postal codes
|
262
|
+
#
|
263
|
+
#
|
264
|
+
# //Postal code (Canada)
|
265
|
+
# '\b[ABCEGHJKLMNPRSTVXY][0-9][A-Z] [0-9][A-Z][0-9]\b'
|
266
|
+
#
|
267
|
+
# //Postal code (UK)
|
268
|
+
# '\b[A-Z]{1,2}[0-9][A-Z0-9]? [0-9][ABD-HJLNP-UW-Z]{2}\b'
|
269
|
+
#
|
270
|
+
|
271
|
+
#
|
272
|
+
# Programming
|
273
|
+
#
|
274
|
+
# //Programming: GUID
|
275
|
+
# //Microsoft-style GUID, numbers only.
|
276
|
+
# '[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}'
|
277
|
+
#
|
278
|
+
# //Programming: GUID
|
279
|
+
# //Microsoft-style GUID, with optional parentheses or braces.
|
280
|
+
# //(Long version, if your regex flavor doesn't support conditionals.)
|
281
|
+
# '[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}|\([A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}\)|\{[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}\}'
|
282
|
+
#
|
283
|
+
# //Programming: GUID
|
284
|
+
# //Microsoft-style GUID, with optional parentheses or braces.
|
285
|
+
# //Short version, illustrating the use of regex conditionals. Not all regex flavors support conditionals.
|
286
|
+
# //Also, when applied to large chunks of data, the regex using conditionals will likely be slower
|
287
|
+
# //than the long version. Straight alternation is much easier to optimize for a regex engine.
|
288
|
+
# '(?:(\()|(\{))?[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}(?(1)\))(?(2)\})'
|
289
|
+
#
|
290
|
+
# //Programming: Remove escapes
|
291
|
+
# //Remove backslashes used to escape other characters
|
292
|
+
# preg_replace('\\(.)', '\1', $text);
|
293
|
+
#
|
294
|
+
# //Programming: String
|
295
|
+
# //Quotes may appear in the string when escaped with a backslash.
|
296
|
+
# //The string may span multiple lines.
|
297
|
+
# '"[^"\\]*(?:\\.[^"\\]*)*"'
|
298
|
+
|
299
|
+
#
|
300
|
+
# Escape
|
301
|
+
#
|
302
|
+
# //Regex: Escape metacharacters
|
303
|
+
# //Place a backslash in front of the regular expression metacharacters
|
304
|
+
# gsub("[][{}()*+?.\\^$|]", "\\$0", $text);
|
305
|
+
|
306
|
+
|
307
|
+
|
308
|
+
# 3530588 3.4G /workspace/data lab13
|
309
|
+
# 2242028 2.2G /workspace/data lab17
|
310
|
+
# 3530588 3.4G /workspace/data lab16
|
311
|
+
# 3530588 3.4G /workspace/data lab21
|
312
|
+
# 3530588 3.4G /workspace/data lab14
|
313
|
+
# 4 4.0K /workspace/data lab12
|
314
|
+
# 3530588 3.4G /workspace/data lab15
|
315
|
+
# 20 20K /workspace/data lab23
|
316
|
+
|
317
|
+
|
318
|
+
|
319
|
+
# Security
|
320
|
+
#
|
321
|
+
#
|
322
|
+
# //Security: ASCII code characters excl. tab and CRLF
|
323
|
+
# //Matches any single non-printable code character that may cause trouble in certain situations.
|
324
|
+
# //Excludes tabs and line breaks.
|
325
|
+
# '[\x00\x08\x0B\x0C\x0E-\x1F]'
|
326
|
+
#
|
327
|
+
# //Security: ASCII code characters incl. tab and CRLF
|
328
|
+
# //Matches any single non-printable code character that may cause trouble in certain situations.
|
329
|
+
# //Includes tabs and line breaks.
|
330
|
+
# '[\x00-\x1F]'
|
331
|
+
#
|
332
|
+
# //Security: Escape quotes and backslashes
|
333
|
+
# //E.g. escape user input before inserting it into a SQL statement
|
334
|
+
# gsub("\\$0", "\\$0", $text);
|
335
|
+
#
|
336
|
+
# //Security: Unicode code and unassigned characters excl. tab and CRLF
|
337
|
+
# //Matches any single non-printable code character that may cause trouble in certain situations.
|
338
|
+
# //Also matches any Unicode code point that is unused in the current Unicode standard,
|
339
|
+
# //and thus should not occur in text as it cannot be displayed.
|
340
|
+
# //Excludes tabs and line breaks.
|
341
|
+
# '[^\P{C}\t\r\n]'
|
342
|
+
#
|
343
|
+
# //Security: Unicode code and unassigned characters incl. tab and CRLF
|
344
|
+
# //Matches any single non-printable code character that may cause trouble in certain situations.
|
345
|
+
# //Also matches any Unicode code point that is unused in the current Unicode standard,
|
346
|
+
# //and thus should not occur in text as it cannot be displayed.
|
347
|
+
# //Includes tabs and line breaks.
|
348
|
+
# '\p{C}'
|
349
|
+
#
|
350
|
+
# //Security: Unicode code characters excl. tab and CRLF
|
351
|
+
# //Matches any single non-printable code character that may cause trouble in certain situations.
|
352
|
+
# //Excludes tabs and line breaks.
|
353
|
+
# '[^\P{Cc}\t\r\n]'
|
354
|
+
#
|
355
|
+
# //Security: Unicode code characters incl. tab and CRLF
|
356
|
+
# //Matches any single non-printable code character that may cause trouble in certain situations.
|
357
|
+
# //Includes tabs and line breaks.
|
358
|
+
# '\p{Cc}'
|
359
|
+
#
|
360
|
+
#
|
361
|
+
#
|
362
|
+
# SSN (Social security numbers)
|
363
|
+
#
|
364
|
+
#
|
365
|
+
# //Social security number (US)
|
366
|
+
# '\b[0-9]{3}-[0-9]{2}-[0-9]{4}\b'
|
367
|
+
|
368
|
+
|
369
|
+
|
370
|
+
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Wuclan::Models
|
2
|
+
|
3
|
+
#
|
4
|
+
#
|
5
|
+
#
|
6
|
+
module TextElementCommon
|
7
|
+
# Key on text-status_id
|
8
|
+
def num_key_fields() 2 end
|
9
|
+
end
|
10
|
+
|
11
|
+
#
|
12
|
+
# Topical #hashtags extracted from tweet text
|
13
|
+
#
|
14
|
+
# the twitter_user_id is denormalized
|
15
|
+
# but is often what we wnat: saves a join
|
16
|
+
#
|
17
|
+
class Hashtag < TypedStruct.new(
|
18
|
+
[:hashtag, String ],
|
19
|
+
[:status_id, Integer ],
|
20
|
+
[:twitter_user_id, Integer ]
|
21
|
+
)
|
22
|
+
include ModelCommon
|
23
|
+
include TextElementCommon
|
24
|
+
alias_method :text, :hashtag
|
25
|
+
def numeric_id_fields() [:twitter_user_id, :status_id] ; end
|
26
|
+
end
|
27
|
+
|
28
|
+
class TweetUrl < TypedStruct.new(
|
29
|
+
[:tweet_url, String ],
|
30
|
+
[:status_id, Integer ],
|
31
|
+
[:twitter_user_id, Integer ]
|
32
|
+
)
|
33
|
+
include ModelCommon
|
34
|
+
include TextElementCommon
|
35
|
+
alias_method :text, :tweet_url
|
36
|
+
def numeric_id_fields() [:twitter_user_id, :status_id] ; end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'wuclan/models/tweet/tweet_token'
|
2
|
+
require 'wukong/encoding'
|
3
|
+
module Wuclan::Models
|
4
|
+
Tweet.class_eval do
|
5
|
+
def string_for_tokenizing
|
6
|
+
# simpleminded test for non-latin script: don't bother if > 20 entities
|
7
|
+
return if (text.count('&') > 20)
|
8
|
+
# skip default message from early days
|
9
|
+
return if (text =~ /just setting up my twttr/);
|
10
|
+
# return decoded, whitespace-flattened text
|
11
|
+
self.decoded_text.gsub(/\s+/s, ' ').strip
|
12
|
+
end
|
13
|
+
|
14
|
+
def tokens_for klass, str
|
15
|
+
klass.extract_tokens!(str).map do |word|
|
16
|
+
klass.new(word, twitter_user_id, id, 1)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def tokenize extract_word_tokens=nil
|
21
|
+
str = string_for_tokenizing
|
22
|
+
return [] if str.blank?
|
23
|
+
toks = []
|
24
|
+
# Case-sensitive tokens
|
25
|
+
[ SmilieToken, UrlToken ].each do |klass|
|
26
|
+
toks += tokens_for klass, str
|
27
|
+
end
|
28
|
+
# Case-insensitive tokens
|
29
|
+
str.downcase!
|
30
|
+
[ RtToken, AtsignToken, HashtagToken ].each do |klass| # ,
|
31
|
+
toks += tokens_for klass, str
|
32
|
+
end
|
33
|
+
toks += tokens_for WordToken, str if extract_word_tokens
|
34
|
+
toks
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,202 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
module Wuclan
|
3
|
+
module Models
|
4
|
+
module TweetRegexes
|
5
|
+
# ===========================================================================
|
6
|
+
#
|
7
|
+
# Twitter accepts URLs somewhat idiosyncratically, probably for good reason --
|
8
|
+
# we rarely see ()![] in urls; more likely in a status they are punctuation.
|
9
|
+
#
|
10
|
+
# This is what I've reverse engineered.
|
11
|
+
#
|
12
|
+
#
|
13
|
+
# Notes:
|
14
|
+
#
|
15
|
+
# * is.gd uses a trailing '-' (to indicate 'preview mode'): clever.
|
16
|
+
# * pastoid.com uses a trailing '+', and idek.net a trailing ~ for no reason. annoying.
|
17
|
+
#
|
18
|
+
# Counterexamples:
|
19
|
+
# * http://www.5irecipe.cn/recipe_content/2307/'/
|
20
|
+
# * http://www.facebook.com/groups.php?id=1347199977&gv=12#/group.php?gid=18183539495
|
21
|
+
#
|
22
|
+
RE_DOMAIN_HEAD = '(?:[a-zA-Z0-9\-]+\.)+'
|
23
|
+
RE_DOMAIN_TLD = '(?:com|org|net|edu|gov|mil|biz|info|mobi|name|aero|jobs|museum|[a-zA-Z]{2})'
|
24
|
+
# RE_URL_SCHEME = '[a-zA-Z][a-zA-Z0-9\-\+\.]+'
|
25
|
+
RE_URL_SCHEME_STRICT = '[a-zA-Z]{3,6}'
|
26
|
+
RE_URL_UNRESERVED = 'a-zA-Z0-9' + '\-\._~'
|
27
|
+
RE_URL_OKCHARS = RE_URL_UNRESERVED + '\'\+\,\;=' + '/%:@' # not !$&()* [] \|
|
28
|
+
RE_URL_QUERYCHARS = RE_URL_OKCHARS + '&='
|
29
|
+
RE_URL_HOSTPART = "#{RE_URL_SCHEME_STRICT}://#{RE_DOMAIN_HEAD}#{RE_DOMAIN_TLD}"
|
30
|
+
RE_URL = %r{(
|
31
|
+
#{RE_URL_HOSTPART} # Host
|
32
|
+
(?:(?: \/ [#{RE_URL_OKCHARS}]+? )*? # path: / delimited path segments
|
33
|
+
(?: \/ [#{RE_URL_OKCHARS}]*[\w\-\+\~] ) # where the last one ends in a non-punctuation.
|
34
|
+
| # ... or no path segment
|
35
|
+
)\/? # with an optional trailing slash
|
36
|
+
(?: \? [#{RE_URL_QUERYCHARS}]+ )? # query: introduced by a ?, with &foo= delimited segments
|
37
|
+
(?: \# [#{RE_URL_OKCHARS}]+ )? # frag: introduced by a #
|
38
|
+
)}x
|
39
|
+
|
40
|
+
|
41
|
+
#
|
42
|
+
# Technically a scheme can allow the characters '+', '-' and '.' within
|
43
|
+
# it. In practice you can not only ignore those characters but all but a
|
44
|
+
# few specific schemes.
|
45
|
+
#
|
46
|
+
# From a collection of ~9M tweeted urls, 99.4% were http://, with only the additional
|
47
|
+
# https, mms, ftp, git, irc, feed, itpc, rtsp, hxxp, gopher, telnet, itms, ssh, webcal, svn
|
48
|
+
# seemingly worth finding:
|
49
|
+
#
|
50
|
+
# 8925742 http
|
51
|
+
# 6026 https 1841 ivo 122 mms 85 ftp 61 git 53 irc 45 feed 31 itpc 12 www
|
52
|
+
# 12 rtsp 12 hxxp 12 gopher 9 telnet 9 itms 7 ssh 5 webcal 5 sop 4 wiie
|
53
|
+
# 3 svn 3 sssp 3 file 2 res 1 xttp 1 xmlrpc 1 ssl 1 smb
|
54
|
+
#
|
55
|
+
# An hxxp http://en.wikipedia.org/wiki/Hxxp is used to obscure a link, so
|
56
|
+
# take of that what you may.
|
57
|
+
#
|
58
|
+
# The ivo:// scheme is used by virtual astronomical observatories; as its
|
59
|
+
# hostnames are given in reverse-dotted notation (uk.org.estar) these URIs
|
60
|
+
# are imperfectly recognized. Twitter doesn't accept them at all:
|
61
|
+
# http://twitter.com/eSTAR_Project/status/1113930948
|
62
|
+
#
|
63
|
+
#
|
64
|
+
|
65
|
+
|
66
|
+
# ===========================================================================
|
67
|
+
#
|
68
|
+
# A hash following a non-alphanum_ (or at the start of the line
|
69
|
+
# followed by (any number of alpha, num, -_.+:=) and ending in an alphanum_
|
70
|
+
#
|
71
|
+
# This is overly generous to those dorky triple tags (geo:lat=69.3), but we'll soldier on somehow.
|
72
|
+
#
|
73
|
+
RE_HASHTAGS = %r{(?:^|\W)\#([a-zA-Z0-9\-_\.+:=]+\w)(?:\W|$)}
|
74
|
+
|
75
|
+
# ===========================================================================
|
76
|
+
#
|
77
|
+
# Retweets and Retweet Whores
|
78
|
+
#
|
79
|
+
# See ARetweetsB for more info.
|
80
|
+
#
|
81
|
+
# A retweet
|
82
|
+
# RT @interesting_user Something so witty Dorothy Parker would just give up
|
83
|
+
# Oh yeah and so's your mom (via @sixth_grader)
|
84
|
+
# retweeting @ogre: KEGGER TONITE RT pls
|
85
|
+
# ^^^ this is not a rtwhore; it matches first as a retweet
|
86
|
+
#
|
87
|
+
# and rtwhores
|
88
|
+
# retweet please: Hey here's something I'm whoring xxx
|
89
|
+
# KEGGER TONITE RT pls
|
90
|
+
#
|
91
|
+
# or semantically-incorrect matches such as (actual example):
|
92
|
+
# @somebody lol, love the 'please retweet' ending!
|
93
|
+
#
|
94
|
+
# Things that don't match:
|
95
|
+
# retweet is silly, @i_think_youre_dumb
|
96
|
+
# misspell the name of my Sony Via
|
97
|
+
#
|
98
|
+
RE_RETWEET_WORDS = 'rt|retweet|retweeting'
|
99
|
+
RE_RETWEET_ONLY = %r{(?:#{RE_RETWEET_WORDS})}
|
100
|
+
RE_RETWEET_OR_VIA = %r{(?:#{RE_RETWEET_WORDS}|via|from)}
|
101
|
+
RE_PLEASE = %r{(?:please|plz|pls)}
|
102
|
+
RE_RETWEET = %r{\b#{RE_RETWEET_OR_VIA}\W*@(\w+)\b}i
|
103
|
+
RE_RTWHORE = %r{
|
104
|
+
\b#{RE_RETWEET_ONLY}\W*#{RE_PLEASE}\b
|
105
|
+
| \b#{RE_PLEASE}\W*#{RE_RETWEET_ONLY}\b}ix
|
106
|
+
|
107
|
+
# ===========================================================================
|
108
|
+
#
|
109
|
+
# following either the start of the line, or a non-alphanum_ character
|
110
|
+
# the string of following [a-zA-Z0-9_]
|
111
|
+
#
|
112
|
+
# Note carefully: we _demand_ a preceding character (or start of line):
|
113
|
+
# \b would match email@address.com, which we don't want.
|
114
|
+
#
|
115
|
+
# Making an exception for RT@im_cramped_for_space.
|
116
|
+
#
|
117
|
+
# All retweets
|
118
|
+
#
|
119
|
+
RE_ATSIGNS = %r{(?:^|\W|#{RE_RETWEET_OR_VIA})@(\w+)\b}
|
120
|
+
|
121
|
+
|
122
|
+
|
123
|
+
# ===========================================================================
|
124
|
+
#
|
125
|
+
# Smilies !!! ^_^
|
126
|
+
#
|
127
|
+
|
128
|
+
# RE_NUMBERS = %r{
|
129
|
+
# (?:^|\D) # non-number
|
130
|
+
# (
|
131
|
+
# |(?:\(\d{3}\)[\ \-]?\d{3}[\ \-]\d{4})
|
132
|
+
# |(?: (?:\d{1,3}\.)(?:\d{3},)*\.?\d+) # decimal number
|
133
|
+
# |(?: (?:\d{1,3}\.)(?:\d{3}\.)*,?\d+) # euro-style
|
134
|
+
# \d+
|
135
|
+
# )
|
136
|
+
# }x
|
137
|
+
#
|
138
|
+
# # IP address
|
139
|
+
# \b(?:\d{1,3}\.){3}\d{1,3}\b
|
140
|
+
# credit card: (lax)
|
141
|
+
# \b(?:\d[ -]*){13,16}\b
|
142
|
+
# \b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|6(?:011|5[0-9][0-9])[0-9]{12}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|(?:2131|1800|35\d{3})\d{11})\b
|
143
|
+
#
|
144
|
+
# [-+]?[0-9,]*\.?[0-9]*
|
145
|
+
# [-+]?[0-9]*(\.[0-9]+)?([eE][-+]?[0-9]+)?
|
146
|
+
|
147
|
+
# ===========================================================================
|
148
|
+
#
|
149
|
+
# Smilies !!! ^_^
|
150
|
+
#
|
151
|
+
RE_SMILIES_EYES = "\\:8;"
|
152
|
+
RE_SMILIES_NOSE = "\\-=\\*o"
|
153
|
+
RE_SMILIES_MOUTH = "DP@Oo\\(\\)\\[\\]\\|\\{\\}\\/\\\\"
|
154
|
+
RE_SMILIES = %r{
|
155
|
+
(?:^|\W) # non-smilie character
|
156
|
+
( (?:
|
157
|
+
>?
|
158
|
+
[#{RE_SMILIES_EYES}] # eyes
|
159
|
+
[#{RE_SMILIES_NOSE}]? # nose, maybe
|
160
|
+
[#{RE_SMILIES_MOUTH}] ) # mouth
|
161
|
+
|(?:
|
162
|
+
[#{RE_SMILIES_MOUTH}] # mouth
|
163
|
+
[#{RE_SMILIES_NOSE}]? # nose, maybe
|
164
|
+
[#{RE_SMILIES_EYES}] # eyes
|
165
|
+
<? )
|
166
|
+
|(?: =[#{RE_SMILIES_MOUTH}]) # =) (=
|
167
|
+
|(?: [#{RE_SMILIES_MOUTH}]=) # =) (=
|
168
|
+
|(?: \^[_\-]\^ ) # kawaaaaiiii!
|
169
|
+
|(?: :[,\']\( ) # snif
|
170
|
+
|(?: <3 ) # heart
|
171
|
+
|(?: \\m/ ) # rawk
|
172
|
+
|(?: x-\( ) # dead
|
173
|
+
)
|
174
|
+
(?:\W|$)
|
175
|
+
}x
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
# http://mail.google.com/support/bin/answer.py?hl=en&answer=34056
|
182
|
+
# http://en.wikipedia.org/wiki/Emoticons
|
183
|
+
#
|
184
|
+
# :-) :) =] =) Smiling, happy
|
185
|
+
# :-( =( :[ :< frowning, Sad
|
186
|
+
# ;-) ;) ;] Wink
|
187
|
+
# :D =D XD BD Large grin or laugh
|
188
|
+
# :P =P XP Tongue out, or after a joke
|
189
|
+
# <3 S2 :> Love
|
190
|
+
# :O =O Shocked or surprised
|
191
|
+
# =I :/ :-\ Bored, annoyed or awkward; concerned.
|
192
|
+
# :S =S :? Confused, embarrassed or uneasy
|
193
|
+
|
194
|
+
# Icon Meaning Icon Meaning Icon Meaning
|
195
|
+
# (^_^) smile (^o^) laughing out loud d(^_^)b thumbs up (not ears)
|
196
|
+
# (T_T) sad (crying face) (-.-)Zzz sleeping (Z.Z) sleepy person
|
197
|
+
# \(^_^)/ cheers, "Hurrah!" (*^^*) shyness (-_-); sweating (as in ashamed), or exasperated.
|
198
|
+
# (*3*) "Surprise !." (?_?) "Nonsense, I don't know." (^_~) wink
|
199
|
+
# (o.O) shocked/disturbed (<.<) shifty, suspicious v(^_^)v peace
|
200
|
+
#
|
201
|
+
# [\\dv](^_^)[bv/]
|
202
|
+
#
|