synapse 0.0.1 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/client/.Rhistory ADDED
@@ -0,0 +1,294 @@
1
+ plot(accepted_denied$x, accepted_denied$y.y / (accepted_denied$y.x + accepted_denied$y.y), type="l", col="blue", xlab="Impressions", ylab="N_denied / N_accepted", main="Denied / accepted ratio")
2
+ library(ggplot2)
3
+ # Search radius (25mi is the default)
4
+ R <- 25.0
5
+ # Blended sort radius, based on bbox
6
+ BLENDED_R <- 16.39
7
+ # Distance is counted in increments of 0.1mi (old code)
8
+ MILES_INCR <- 0.1
9
+ # Take n samples
10
+ RESO_SAMPLES <- 50
11
+ # Or top n by rank
12
+ RANK_TOP_N <- 50
13
+ sf <- subset(read.table("in/sf_points_rank.2012-03-06.csv", sep="\t", header=TRUE), !duplicated(rank))
14
+ resos <- read.table("in/resos_3months_out.2012-03-07.csv", sep=",", header=TRUE)
15
+ sf_resos <- merge(sf, resos, by="hosting_id")
16
+ # Take random samples
17
+ sf_resos <- sf_resos[sample(1:nrow(sf_resos), RESO_SAMPLES, replace=FALSE),]
18
+ # Take top n
19
+ # sf_resos <- subset(sf_resos, rank > 0 & rank <= RANK_TOP_N)
20
+ ## Additonal values
21
+ sf_resos$d_R <- sf_resos$distance / R
22
+ sf_resos$o_1 <- sf_resos$nights_1month / 31
23
+ sf_resos$o_2 <- sf_resos$nights_2month / 62
24
+ sf_resos$o_3 <- sf_resos$nights_3month / 93
25
+ for (i in 1:nrow(sf_resos)) {
26
+ # Make log work. < 1 point are all treated as equally bad
27
+ if (sf_resos$points[i] < 1) {
28
+ sf_resos$log_p[i] = 0.0
29
+ }
30
+ else {
31
+ sf_resos$log_p[i] <- log10(sf_resos$points[i])
32
+ }
33
+ }
34
+ # Weights
35
+ W_p <- 0.3 # Note: points aren't normalized. The max is around 2000, so 0.3 * log10() gives just under 1.0
36
+ W_d <- -0.6
37
+ W_o <- -0.2
38
+ sf_resos$weighted_points <- W_p * sf_resos$log_p
39
+ sf_resos$weighted_distance <- W_d * sf_resos$d_R
40
+ sf_resos$weighted_occupancy <- W_o * sf_resos$o_1
41
+ sf_resos$rank2 <- sf_resos$weighted_points + sf_resos$weighted_distance + sf_resos$weighted_occupancy
42
+ # min <- min(sf_resos$rank2)
43
+ # sf_resos$rank2 <- sf_resos$rank2 - min
44
+ MAX_POINTS <- max(sf$points)
45
+ ## normalize
46
+ graph_rank <- sf_resos$rank2
47
+ d_R <- data.frame(metric=factor("d_R"), rank=factor(graph_rank), value=sf_resos$distance / R)
48
+ o_1 <- data.frame(metric=factor("o_1"), rank=factor(graph_rank), value=sf_resos$nights_1month / 31)
49
+ o_2 <- data.frame(metric=factor("o_2"), rank=factor(graph_rank), value=sf_resos$nights_2month / 62)
50
+ o_3 <- data.frame(metric=factor("o_3"), rank=factor(graph_rank), value=sf_resos$nights_3month / 93)
51
+ weighted_points <- data.frame(metric=factor("weighted_points"), rank=factor(graph_rank), value=sf_resos$weighted_points)
52
+ weighted_distance <- data.frame(metric=factor("weighted_distance"), rank=factor(graph_rank), value=sf_resos$weighted_distance)
53
+ weighted_occupancy <- data.frame(metric=factor("weighted_occupancy"), rank=factor(graph_rank), value=sf_resos$weighted_occupancy)
54
+ # TODO there should be a better way
55
+ old_points <- data.frame(metric=factor("old_points"), rank=factor(graph_rank), value=sf_resos$points)
56
+ old_blended <- data.frame(metric=factor("old_blended"), rank=factor(graph_rank), value=sf_resos$points)
57
+ old_distance <- data.frame(metric=factor("old_distance"), rank=factor(graph_rank), value=sf_resos$distance * 500)
58
+ log_p <- data.frame(metric=factor("log_p"), rank=factor(graph_rank), value=sf_resos$points)
59
+ for (i in 1:nrow(log_p)) {
60
+ # Make log work. < 1 point are all treated as equally bad
61
+ if (log_p$value[i] < 1) {
62
+ log_p$value[i] = 0.0
63
+ }
64
+ else {
65
+ log_p$value[i] <- log10(log_p$value[i])
66
+ }
67
+ # Old blended
68
+ if (sf_resos$distance[i] < BLENDED_R) {
69
+ old_blended$value[i] = sf_resos$points[i]
70
+ }
71
+ else {
72
+ old_blended$value[i] = log10(sf_resos$points[i] / sf_resos$distance[i])
73
+ }
74
+ }
75
+ rank_data <- rbind(d_R, log_p, o_1, o_2, o_3)
76
+ ## end normalize
77
+ old_rank_data <- rbind(old_blended, old_distance)
78
+ # ggplot(data=old_rank_data, aes(x=rank, y=value, fill=metric)) + geom_bar(position=position_dodge())
79
+ new_rank_data <- rbind(weighted_points, weighted_distance, weighted_occupancy)
80
+ filename <- sprintf("out/weighted_blend_components_W_p=%f_W_d=%f_W_o=%f.pdf", W_p, W_d, W_o)
81
+ pdf(filename)
82
+ ggplot(data=new_rank_data, aes(x=rank, y=value, fill=metric)) + geom_bar(position=position_dodge())
83
+ # ggplot(data=rank_data, aes(x=rank, y=value, fill=metric)) + geom_bar()
84
+ library(ggplot2)
85
+ # Search radius (25mi is the default)
86
+ R <- 25.0
87
+ # Blended sort radius, based on bbox
88
+ BLENDED_R <- 16.39
89
+ # Distance is counted in increments of 0.1mi (old code)
90
+ MILES_INCR <- 0.1
91
+ # Take n samples
92
+ RESO_SAMPLES <- 50
93
+ # Or top n by rank
94
+ RANK_TOP_N <- 50
95
+ sf <- subset(read.table("in/sf_points_rank.2012-03-06.csv", sep="\t", header=TRUE), !duplicated(rank))
96
+ resos <- read.table("in/resos_3months_out.2012-03-07.csv", sep=",", header=TRUE)
97
+ sf_resos <- merge(sf, resos, by="hosting_id")
98
+ # Take random samples
99
+ sf_resos <- sf_resos[sample(1:nrow(sf_resos), RESO_SAMPLES, replace=FALSE),]
100
+ # Take top n
101
+ # sf_resos <- subset(sf_resos, rank > 0 & rank <= RANK_TOP_N)
102
+ ## Additonal values
103
+ sf_resos$d_R <- sf_resos$distance / R
104
+ sf_resos$o_1 <- sf_resos$nights_1month / 31
105
+ sf_resos$o_2 <- sf_resos$nights_2month / 62
106
+ sf_resos$o_3 <- sf_resos$nights_3month / 93
107
+ for (i in 1:nrow(sf_resos)) {
108
+ # Make log work. < 1 point are all treated as equally bad
109
+ if (sf_resos$points[i] < 1) {
110
+ sf_resos$log_p[i] = 0.0
111
+ }
112
+ else {
113
+ sf_resos$log_p[i] <- log10(sf_resos$points[i])
114
+ }
115
+ }
116
+ # Weights
117
+ W_p <- 0.3 # Note: points aren't normalized. The max is around 2000, so 0.3 * log10() gives just under 1.0
118
+ W_d <- -0.6
119
+ W_o <- -0.2
120
+ sf_resos$weighted_points <- W_p * sf_resos$log_p
121
+ sf_resos$weighted_distance <- W_d * sf_resos$d_R
122
+ sf_resos$weighted_occupancy <- W_o * sf_resos$o_1
123
+ sf_resos$rank2 <- sf_resos$weighted_points + sf_resos$weighted_distance + sf_resos$weighted_occupancy
124
+ # min <- min(sf_resos$rank2)
125
+ # sf_resos$rank2 <- sf_resos$rank2 - min
126
+ MAX_POINTS <- max(sf$points)
127
+ ## normalize
128
+ graph_rank <- sf_resos$rank2
129
+ d_R <- data.frame(metric=factor("d_R"), rank=factor(graph_rank), value=sf_resos$distance / R)
130
+ o_1 <- data.frame(metric=factor("o_1"), rank=factor(graph_rank), value=sf_resos$nights_1month / 31)
131
+ o_2 <- data.frame(metric=factor("o_2"), rank=factor(graph_rank), value=sf_resos$nights_2month / 62)
132
+ o_3 <- data.frame(metric=factor("o_3"), rank=factor(graph_rank), value=sf_resos$nights_3month / 93)
133
+ weighted_points <- data.frame(metric=factor("weighted_points"), rank=factor(graph_rank), value=sf_resos$weighted_points)
134
+ weighted_distance <- data.frame(metric=factor("weighted_distance"), rank=factor(graph_rank), value=sf_resos$weighted_distance)
135
+ weighted_occupancy <- data.frame(metric=factor("weighted_occupancy"), rank=factor(graph_rank), value=sf_resos$weighted_occupancy)
136
+ # TODO there should be a better way
137
+ old_points <- data.frame(metric=factor("old_points"), rank=factor(graph_rank), value=sf_resos$points)
138
+ old_blended <- data.frame(metric=factor("old_blended"), rank=factor(graph_rank), value=sf_resos$points)
139
+ old_distance <- data.frame(metric=factor("old_distance"), rank=factor(graph_rank), value=sf_resos$distance * 500)
140
+ log_p <- data.frame(metric=factor("log_p"), rank=factor(graph_rank), value=sf_resos$points)
141
+ for (i in 1:nrow(log_p)) {
142
+ # Make log work. < 1 point are all treated as equally bad
143
+ if (log_p$value[i] < 1) {
144
+ log_p$value[i] = 0.0
145
+ }
146
+ else {
147
+ log_p$value[i] <- log10(log_p$value[i])
148
+ }
149
+ # Old blended
150
+ if (sf_resos$distance[i] < BLENDED_R) {
151
+ old_blended$value[i] = sf_resos$points[i]
152
+ }
153
+ else {
154
+ old_blended$value[i] = log10(sf_resos$points[i] / sf_resos$distance[i])
155
+ }
156
+ }
157
+ rank_data <- rbind(d_R, log_p, o_1, o_2, o_3)
158
+ ## end normalize
159
+ old_rank_data <- rbind(old_blended, old_distance)
160
+ # ggplot(data=old_rank_data, aes(x=rank, y=value, fill=metric)) + geom_bar(position=position_dodge())
161
+ new_rank_data <- rbind(weighted_points, weighted_distance, weighted_occupancy)
162
+ filename <- sprintf("out/weighted_blend_components_W_p=%f_W_d=%f_W_o=%f.pdf", W_p, W_d, W_o)
163
+ pdf(filename)
164
+ ggplot(data=new_rank_data, aes(x=rank, y=value, fill=metric)) + geom_bar(position=position_dodge())
165
+ # ggplot(data=rank_data, aes(x=rank, y=value, fill=metric)) + geom_bar()
166
+ install.packages('e1071')
167
+ library("e1071")
168
+ ?svm
169
+ data <- read.table("/Users/tobi/Projects/auto_pricing/data.csv")
170
+ data <- read.table("/Users/tobi/Projects/auto_pricing/data.csv", sep=",")
171
+ View(data)
172
+ m <- svm(data$V7, data$V1)
173
+ data <- read.table("/Users/tobi/Projects/auto_pricing/data.csv", sep=",", header=TRUE)
174
+ View(data)
175
+ m <- svm(data$country_state_city_neighborhood, data$price)
176
+ m <- svm(as.factor(data$country_state_city_neighborhood), data$price)
177
+ data_clean <- subset(data, !is.na(country_state_city_neighborhood))
178
+ m <- svm(as.factor(data$square_feet, data$price)
179
+ )
180
+ m <- svm(data$square_feet, data$price)
181
+ x <- seq(0.1, 5, by = 0.05)
182
+ y <- log(x) + rnorm(x, sd = 0.2)
183
+ fix(x)
184
+ m <- svm(x, y)
185
+ new <- predict(m, x)
186
+ plot(x, y)
187
+ points(x, log(x), col = 2)
188
+ points(x, new, col = 4)
189
+ X <- data.frame(a = rnorm(1000), b = rnorm(1000))
190
+ attach(X)
191
+ m <- svm(X)
192
+ View(X)
193
+ m <- svm(~a + b)
194
+ predict(m, t(c(0, 0)))
195
+ data <- read.table("/Users/tobi/Desktop/data.csv")
196
+ data <- read.table("/Users/tobi/Desktop/data.csv", sep="\t")
197
+ View(data)
198
+ data <- read.table("/Users/tobi/Desktop/data.csv", sep=",")
199
+ View(data)
200
+ data <- read.table("/Users/tobi/Desktop/data.csv", sep=",", header=TRUE)
201
+ View(data)
202
+ data$0
203
+ data$1
204
+ data[1]
205
+ data[0]
206
+ data[2]
207
+ plot(data[1], data[2])
208
+ plot(data[2])
209
+ plot(data[1], data[2])
210
+ plot(data[1], data[3])
211
+ library("ggplot2")
212
+ qplot
213
+ qplot(data[1], data[2])
214
+ qplot(x=data[1], y=data[2])
215
+ data <- read.table("/Users/tobi/Desktop/data.csv", sep=",", header=TRUE)
216
+ plot(data)
217
+ data <- read.table("/Users/tobi/Desktop/data.csv", sep=",", header=TRUE)
218
+ library("ggplot2")
219
+ qplot(date, fees, data=data)
220
+ qplot(date, fees, data=data, geom=c("point", "smooth"))
221
+ qplot(date, fees, data=data, geom=c("point", "smooth"), group=1)
222
+ data2011 <- read.table("/Users/tobi/Desktop/data_2011.csv", sep=",", header=TRUE)
223
+ View(data2011)
224
+ qplot(date, fees, data=data2011, geom=c("point", "smooth"), group=1)
225
+ qplot(date, fees, data=data, geom=c("point", "smooth"), group=1)
226
+ qplot(date, fees, data=data2011, geom=c("point", "smooth"), group=1)
227
+ qplot(date, fees, data=data, geom=c("point", "smooth"), group=1)
228
+ data_all <- read.table("/Users/tobi/Desktop/data_all.csv", sep=",", header=TRUE)
229
+ qplot(date, fees, data=data_all, geom=c("point", "smooth"), group=1)
230
+ data_all <- read.table("/Users/tobi/Desktop/data_2011_present.csv", sep=",", header=TRUE)
231
+ qplot(date, fees, data=data_all, geom=c("point", "smooth"), group=1)
232
+ View(data_all)
233
+ data_all <- read.table("/Users/tobi/Desktop/data_all.csv", sep=",", header=TRUE)
234
+ qplot(date, fees, data=data_all, geom=c("point", "smooth"), group=1)
235
+ library("ggplot2")
236
+ data <- read.table("~/all_posts.csv")
237
+ d <- read.tabl('statsd_data.txt')
238
+ d <- read.table('statsd_data.txt')
239
+ View(d)
240
+ r <- signif(d, 2)
241
+ View(r)
242
+ f <- fft(r)
243
+ f <- fft(r$V1)
244
+ fix(f)
245
+ plot(f)
246
+ plot(r)
247
+ describe(r)
248
+ describe(r)
249
+ summary(r)
250
+ stddev(r)
251
+ std(r)
252
+ hist(r)
253
+ hist(r$V1)
254
+ hist(d$V1)
255
+ hist(d$V1, breaks=10)
256
+ hist(d$V1, breaks=30)
257
+ hist(d$V1, breaks=20)
258
+ hist(d$V1, breaks=50)
259
+ hist(d$V1, breaks=60)
260
+ hist(d$V1, breaks=100)
261
+ hist(d$V1, breaks=50)
262
+ summary(d$V1)
263
+ setwd("~/Projects/service_router/client")
264
+ data <- read.table("bench_rewrite_config.dat")
265
+ View(data)
266
+ library("ggplot2")
267
+ qplot(data)
268
+ qplot(data, V3, geom="line")
269
+ qplot(V2, V3, data=data, geom="line")
270
+ qplot(V2, V3, data=data, geom="line", facets=V1)
271
+ qplot(V2, V3, data=data, geom="line", facets=data$V1)
272
+ qplot(V2, V3, data=data, geom="line", facets=as.factor(V1))
273
+ qplot(V2, V3, data=data, geom="line", facets=as.factor(data$V1))
274
+ qplot(V2, V3, data=data, geom="line", facets=as.factor(V1) ~ .)
275
+ qplot(V2, V3, data=data, geom="line", facets=V1 ~ .)
276
+ qplot(V2, V3, data=data, geom="line", colour=V1 ~ .)
277
+ qplot(V2, V3, data=data, geom="line", colour=V1)
278
+ qplot(V2, V3, data=data, geom="line", colour=V1, ylim=c(0, 9999))
279
+ qplot(V2, V3, data=data, geom="line", colour=V1, ylim=c(999, 9999))
280
+ qplot(V2, V3, data=data, geom="line", colour=V1, ylim=c(1000, 6000))
281
+ qplot(V2, V3, data=data, geom="line", colour=V1, ylim=c(1000, 5000))
282
+ qplot(V2, V3, data=data, geom="line", colour=as.factor(V1), ylim=c(1000, 5000))
283
+ qplot(V2, V3, data=data, colour=as.factor(V1), ylim=c(1000, 5000))
284
+ data <- read.table("bench_rewrite_config.dat")
285
+ qplot(V2, V3, data=data, colour=as.factor(V1), ylim=c(1000, 5000))
286
+ qplot(V2, V3, data=data, colour=as.factor(V1))
287
+ qplot(V2, V3, data=data[10:], colour=as.factor(V1))
288
+ qplot(V2, V3, data=data[10], colour=as.factor(V1))
289
+ qplot(V2, V3, data=data[10], colour=as.factor(V1))
290
+ qplot(V2, V3, data=data, colour=as.factor(V1), ylim=c(1000, 25000))
291
+ qplot(V2, V3, data=data, colour=as.factor(V1), ylim=c(1000, 20000))
292
+ data <- read.table("bench_rewrite_config.dat")
293
+ qplot(V2, V3, data=data, colour=as.factor(V1), ylim=c(1000, 20000))
294
+ qplot(V2, V3, data=data, colour=as.factor(V1), ylim=c(1000, 20000))