sekka 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
data/emacs/http-get.el ADDED
@@ -0,0 +1,448 @@
1
+ ;;; http-get.el --- simple HTTP GET
2
+
3
+ ;; Copyright (C) 2002, 2003 Alex Schroeder
4
+
5
+ ;; Author: Alex Schroeder <alex@gnu.org>
6
+ ;; Pierre Gaston <pierre@gaston-karlaouzou.com>
7
+ ;; David Hansen <david.hansen@physik.fu-berlin.de>
8
+ ;; Maintainer: David Hansen <david.hansen@physik.fu-berlin.de>
9
+ ;; Version: 1.0.15
10
+ ;; Keywords: hypermedia
11
+ ;; URL: http://www.emacswiki.org/cgi-bin/wiki.pl?HttpGet
12
+
13
+ ;; This file is not part of GNU Emacs.
14
+
15
+ ;; This is free software; you can redistribute it and/or modify
16
+ ;; it under the terms of the GNU General Public License as published by
17
+ ;; the Free Software Foundation; either version 2, or (at your option)
18
+ ;; any later version.
19
+
20
+ ;; This is distributed in the hope that it will be useful,
21
+ ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ ;; GNU General Public License for more details.
24
+
25
+ ;; You should have received a copy of the GNU General Public License
26
+ ;; along with GNU Emacs; see the file COPYING. If not, write to the
27
+ ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
28
+ ;; Boston, MA 02111-1307, USA.
29
+
30
+
31
+ ;;; Commentary:
32
+
33
+ ;; Use `http-get' to download an URL.
34
+
35
+ ;;; Change log:
36
+
37
+ ;; 1.0.15
38
+ ;; - made `http-parse-headers' RFC 2616 compatible (removing whitespaces,
39
+ ;; headers may spawn several line)
40
+ ;; - log message headers
41
+ ;; - made most variables buffer local with `make-variable-buffer-local'
42
+ ;; 1.0.14
43
+ ;; - Removed attempt to fix bug in 1.0.12, not needed anymore since 1.0.13.
44
+ ;; 1.0.13
45
+ ;; - The string is now not anymore decoded in the http-filter.
46
+ ;; You have to run `http-decode' yourself.
47
+ ;; 1.0.12
48
+ ;; - Hopefully fixed the bug with inserting "half" multi byte chars.
49
+ ;; 1.0.11
50
+ ;; - Added (setq string (string-make-unibyte string)) to http-filter
51
+ ;; this seems to solve problems with multi byte chars.
52
+ ;; - Fixed bug when building the headers.
53
+ ;; - Fixed indentation (please guys, read the coding conventions in the
54
+ ;; elisp manual)
55
+ ;; - Replaced string-bytes with length (string-bytes shouldn't be needed
56
+ ;; anymore as we force the string to be unibyte)
57
+ ;; 1.0.10
58
+ ;; - Fix some codings problems again.
59
+ ;; 1.0.9
60
+ ;; - Added better coding support.
61
+ ;; 1.0.8
62
+ ;; - Rewrote the parser.
63
+ ;; - Correction to the http 1.0 usage.
64
+ ;; 1.0.3
65
+ ;; - Move http-url-encode from http-post.el to http-get.el.
66
+ ;; - Add a param to http-get to specify the encoding of the params in the url.
67
+
68
+ ;;; Code:
69
+
70
+ (require 'hexl)
71
+ (require 'http-cookies)
72
+
73
+ (defvar http-get-version "1.0.15")
74
+
75
+ ;; Proxy
76
+ (defvar http-proxy-host nil
77
+ "*If nil dont use proxy, else name of proxy server.")
78
+
79
+ (defvar http-proxy-port nil
80
+ "*Port number of proxy server. Default is 80.")
81
+
82
+ (defvar http-coding 'iso-8859-1
83
+ "Default coding to be use when the string is inserted in the buffer.
84
+ This coding will be modified on Finding the content-type header")
85
+ (make-variable-buffer-local 'http-coding)
86
+
87
+ (defvar http-filter-pre-insert-hook '(http-parser)
88
+ "Hook run by the `http-filter'.
89
+ This is called whenever a chunk of input arrives, before it is
90
+ inserted into the buffer. If you want to modify the string that gets
91
+ inserted, modify the variable `string' which is dynamically bound to
92
+ what will get inserted in the end. The string will be inserted at
93
+ the `process-mark', which you can get by calling \(process-mark proc).
94
+ `proc' is dynamically bound to the process, and the current buffer
95
+ is the very buffer where the string will be inserted.")
96
+
97
+ (defvar http-filter-post-insert-hook nil
98
+ "Hook run by the `http-filter'.
99
+ This is called whenever a chunk of input arrives, after it has been
100
+ inserted, but before the `process-mark' has moved. Therefore, the new
101
+ text lies between the `process-mark' and point. You can get the values
102
+ of the `process-mark' by calling (process-mark proc). Please take care
103
+ to leave point at the right place, eg. by wrapping your code in a
104
+ `save-excursion'.")
105
+
106
+ (defun http-filter (proc string)
107
+ "Filter function for HTTP buffers.
108
+ See `http-filter-pre-insert-hook' and `http-filter-post-insert-hook'
109
+ for places where you can do your own stuff such as HTML rendering.
110
+ Argument PROC is the process that is filtered.
111
+ Argument STRING is the string outputted by the process."
112
+ ;; emacs seems to screw this sometimes
113
+ (when (fboundp 'string-make-unibyte)
114
+ (setq string (string-make-unibyte string)))
115
+ (with-current-buffer (process-buffer proc)
116
+ (let ((moving (= (point) (process-mark proc))))
117
+ (save-excursion
118
+ " Insert the text, advancing the process marker."
119
+ (goto-char (process-mark proc))
120
+ (run-hooks 'http-filter-pre-insert-hook)
121
+ ;; Note: the string is inserted binary in a unibyte buffer
122
+ (insert string)
123
+ (run-hooks 'http-filter-post-insert-hook)
124
+ (set-marker (process-mark proc) (point)))
125
+ (if moving (goto-char (process-mark proc))))))
126
+
127
+ (defvar http-status-code nil
128
+ "The status code returned for the current buffer.
129
+ This is set by the function `http-headers'.")
130
+ (make-variable-buffer-local 'http-status-code)
131
+
132
+ (defvar http-reason-phrase nil
133
+ "The reason phrase returned for the `http-status-code'.
134
+ This is set by the function `http-headers'.")
135
+ (make-variable-buffer-local 'http-reason-phrase)
136
+
137
+ (defvar http-headers nil
138
+ "An alist of the headers that have been parsed and removed from the buffer.
139
+ The headers are stored as an alist.
140
+ This is set by the function `http-headers'.")
141
+ (make-variable-buffer-local 'http-headers)
142
+
143
+ (defvar http-parser-state 'status-line
144
+ "Parser status.")
145
+ (make-variable-buffer-local 'http-parser-state)
146
+
147
+ (defvar http-unchunk-chunk-size 0
148
+ "Size of the current unfinished chunk.")
149
+ (make-variable-buffer-local 'http-unchunk-chunk-size)
150
+
151
+ (defvar http-not-yet-parsed ""
152
+ "Received bytes that have not yet been parsed.")
153
+ (make-variable-buffer-local 'http-not-yet-parsed)
154
+
155
+ (defvar http-host ""
156
+ "The host to which we have sent the request.")
157
+ (make-variable-buffer-local 'http-host)
158
+
159
+ (defvar http-url ""
160
+ "The requested URL.")
161
+ (make-variable-buffer-local 'http-url)
162
+
163
+ (defun http-parser ()
164
+ "Simple parser for http message.
165
+ Parse the status line, headers and chunk."
166
+ (let ((parsed-string (concat http-not-yet-parsed string)) content-type)
167
+ (setq string "")
168
+ (setq http-not-yet-parsed "")
169
+ (while (> (length parsed-string) 0)
170
+ (cond
171
+
172
+ ((eq http-parser-state 'status-line)
173
+ ;; parsing status line
174
+ (if (string-match "HTTP/[0-9.]+ \\([0-9]+\\) \\(.*\\)\r\n"
175
+ parsed-string)
176
+ (progn
177
+ (setq http-status-code
178
+ (string-to-number (match-string 1 parsed-string)))
179
+ (setq http-reason-phrase (match-string 2 parsed-string))
180
+ (setq http-parser-state 'header)
181
+ (setq parsed-string (substring parsed-string (match-end 0))))
182
+ ;; status line not found
183
+ (setq http-not-yet-parsed parsed-string)
184
+ (setq parsed-string "")))
185
+
186
+ ((eq http-parser-state 'header)
187
+ ;; parsing headers
188
+ (if (string-match "\r\n\r\n" parsed-string)
189
+ (let ((end-headers (match-end 0)))
190
+ (setq http-headers
191
+ (http-parse-headers
192
+ (substring parsed-string 0 (match-beginning 0))))
193
+ (if (string= "chunked"
194
+ (cdr (assoc "transfer-encoding" http-headers)))
195
+ (setq http-parser-state 'chunked)
196
+ (setq http-parser-state 'dump))
197
+ (when (and
198
+ (setq content-type
199
+ (cdr (assoc "content-type" http-headers)))
200
+ (string-match "charset=\\(.*\\)" content-type))
201
+ (setq http-coding
202
+ (intern-soft (downcase (match-string 1 content-type)))))
203
+ (setq parsed-string (substring parsed-string end-headers))
204
+ ;; set cookies
205
+ (when http-emacs-use-cookies
206
+ (http-cookies-set http-url http-headers)))
207
+ ;; we don't have all the headers yet
208
+ (setq http-not-yet-parsed parsed-string)
209
+ (setq parsed-string "")))
210
+
211
+ ((eq http-parser-state 'chunked)
212
+ ;; parsing chunked content
213
+ (if (> (length parsed-string) http-unchunk-chunk-size)
214
+ (progn
215
+ (setq string (concat string
216
+ (substring parsed-string 0
217
+ http-unchunk-chunk-size)))
218
+ (setq parsed-string
219
+ (substring parsed-string http-unchunk-chunk-size))
220
+ (setq http-unchunk-chunk-size 0)
221
+
222
+ (if (string-match "\\([0-9a-f]+\\)[^\r^\b]*\\(\r\n\\)"
223
+ parsed-string)
224
+ (if (> (setq http-unchunk-chunk-size
225
+ (hexl-hex-string-to-integer
226
+ (match-string 1 parsed-string)))
227
+ 0)
228
+ (setq parsed-string
229
+ (substring parsed-string (match-end 2)))
230
+ ;; chunk 0 found we just burry it
231
+ (setq parsed-string "")
232
+ (setq http-parser-state 'trailer))
233
+ ;; we don't have the next chunk-size yet
234
+ (setq http-not-yet-parsed parsed-string)
235
+ (setq parsed-string "")))
236
+ ;; the current chunk is not finished yet
237
+ (setq string (concat string parsed-string))
238
+ (setq http-unchunk-chunk-size
239
+ (- http-unchunk-chunk-size (length parsed-string)))
240
+ (setq parsed-string "")))
241
+
242
+ ((eq http-parser-state 'trailer)
243
+ ;; parsing trailer
244
+ (setq parsed-string ""))
245
+
246
+ ((eq http-parser-state 'dump)
247
+ (setq string parsed-string)
248
+ (setq parsed-string ""))))))
249
+
250
+
251
+ (defun http-parse-headers (header-string)
252
+ "Parse the header string.
253
+ Argument HEADER-STRING A string containing a header list."
254
+ ;; headers may spawn several line if the nth, n>1, line starts with
255
+ ;; at least one whitespace
256
+ (setq header-string (replace-regexp-in-string "\r\n[ \t]+" " "
257
+ header-string))
258
+ (let ((lines-list (split-string header-string "\r\n")))
259
+ (mapcar (lambda (line)
260
+ (if (string-match ":[ \t]+\\(.*?\\)[ \t]*$" line)
261
+ (cons (downcase (substring line 0 (match-beginning 0)))
262
+ (match-string 1 line))
263
+ line))
264
+ lines-list)))
265
+
266
+
267
+ ;; URL encoding for parameters
268
+ (defun http-url-encode (str content-type)
269
+ "URL encode STR using CONTENT-TYPE as the coding system."
270
+ (apply 'concat
271
+ (mapcar (lambda (c)
272
+ (if (or (and (>= c ?a) (<= c ?z))
273
+ (and (>= c ?A) (<= c ?Z))
274
+ (and (>= c ?0) (<= c ?9)))
275
+ (string c)
276
+ (format "%%%02x" c)))
277
+ (encode-coding-string str content-type))))
278
+
279
+
280
+ (defun http-decode-buffer ()
281
+ "Decode buffer according to the buffer local variable `http-coding'."
282
+ (when (and
283
+ (fboundp 'set-buffer-multibyte)
284
+ (fboundp 'multibyte-string-p))
285
+ (when (multibyte-string-p (decode-coding-string "test" http-coding))
286
+ (set-buffer-multibyte t)))
287
+ (decode-coding-region (point-min) (point-max) http-coding))
288
+
289
+ ;; Debugging
290
+ (defvar http-log-function 'ignore
291
+ "Function to call for log messages.")
292
+
293
+ (defun http-log (str)
294
+ "Log STR using `http-log-function'.
295
+ The default value just ignores STR."
296
+ (funcall http-log-function str))
297
+
298
+
299
+ (defun http-get-debug (url &optional headers version)
300
+ "Debug the call to `http-get'."
301
+ (interactive "sURL: ")
302
+ (let* ((http-log-function (lambda (str)
303
+ (save-excursion
304
+ ;; dynamic binding -- buf from http-get is used
305
+ (set-buffer buf)
306
+ (insert str))))
307
+ proc)
308
+ (when (get-buffer "*Debug HTTP-GET*")
309
+ (kill-buffer "*Debug HTTP-GET*"))
310
+ (setq proc (http-get url headers nil version))
311
+ (set (make-local-variable 'http-filter-pre-insert-hook) nil)
312
+ (set (make-local-variable 'http-filter-post-insert-hook) nil)
313
+ (rename-buffer "*Debug HTTP-GET*")))
314
+
315
+
316
+ ;; The main function
317
+
318
+ ;;;###autoload
319
+ (defun http-get (url &optional headers sentinel version bufname content-type)
320
+ "Get URL in a buffer, and return the process.
321
+ You can get the buffer associated with this process using
322
+ `process-buffer'.
323
+
324
+ The optional HEADERS are an alist where each element has the form
325
+ \(NAME . VALUE). Both must be strings and will be passed along with
326
+ the request.
327
+
328
+ With optional argument SENTINEL, the buffer is not shown. It is the
329
+ responsibility of the sentinel to show it, if appropriate. A sentinel
330
+ function takes two arguments, process and message. It is called when
331
+ the process is killed, for example. This is useful when specifying a
332
+ non-persistent connection. By default, connections are persistent.
333
+ Add \(\"Connection\" . \"close\") to HEADERS in order to specify a
334
+ non-persistent connection. Usually you do not need to specify a
335
+ sentinel, and `ignore' is used instead, to prevent a message being
336
+ printed when the connection is closed.
337
+
338
+ If you want to filter the content as it arrives, bind
339
+ `http-filter-pre-insert-hook' and `http-filter-post-insert-hook'.
340
+
341
+ The optional argument VERSION specifies the HTTP version to use. It
342
+ defaults to version 1.0, such that the connection is automatically
343
+ closed when the entire document has been downloaded. This will then
344
+ call SENTINEL, if provided. If no sentinel is provided, `ignore' will
345
+ be used in order to prevent a message in the buffer when the process
346
+ is killed.
347
+
348
+ CONTENT-TYPE is a coding system to use for the encoding of the url
349
+ param value. Its upper case print name will be used for the server.
350
+ Possible values are `iso-8859-1' or `euc-jp' and others.
351
+
352
+ The coding system of the process is set to `binary', because we need to
353
+ distinguish between \\r and \\n. To correctly decode the text later,
354
+ use `decode-coding-region' and get the coding system to use from
355
+ `http-headers'."
356
+ (interactive "sURL: ")
357
+ (setq version (or version 1.0))
358
+ (let* (host dir file port proc buf command start-line (message-headers "") )
359
+ (unless (string-match
360
+ "http://\\([^/:]+\\)\\(:\\([0-9]+\\)\\)?/\\(.*/\\)?\\([^:]*\\)"
361
+ url)
362
+ (error "Cannot parse URL %s." url))
363
+ (unless bufname
364
+ (setq bufname (format "*HTTP GET %s *" url)))
365
+
366
+ (setq host (match-string 1 url)
367
+ port (or (and (setq port (match-string 3 url))
368
+ (string-to-int port)) 80)
369
+ dir (or (match-string 4 url) "")
370
+ file (or (match-string 5 url) "")
371
+ buf (get-buffer-create bufname)
372
+ proc (open-network-stream
373
+ (concat "HTTP GET " url) buf
374
+ (if http-proxy-host http-proxy-host host)
375
+ (if http-proxy-port http-proxy-port port) ))
376
+ (if sentinel
377
+ (set-buffer buf)
378
+ (switch-to-buffer buf))
379
+ (erase-buffer)
380
+ (kill-all-local-variables)
381
+ (with-current-buffer buf
382
+ (setq http-host host)
383
+ (setq http-url url))
384
+ (if content-type
385
+ (setq file
386
+ (replace-regexp-in-string
387
+ "=[^&]+"
388
+ (lambda (param)
389
+ (concat "="
390
+ (http-url-encode (substring param 1) content-type)))
391
+ file)))
392
+ (setq start-line
393
+ (concat (format "GET %s%s%s HTTP/%.1f\r\n"
394
+ (if http-proxy-host
395
+ (concat "http://" host "/") "/") dir file version)
396
+ (format "Host: %s\r\n" host)))
397
+ (when http-emacs-use-cookies
398
+ (let ((cookie (http-cookies-build-header url)))
399
+ (when cookie (add-to-list 'headers cookie))))
400
+ (when headers
401
+ (setq message-headers (mapconcat (lambda (pair)
402
+ (concat (car pair) ": " (cdr pair)))
403
+ headers
404
+ "\r\n")))
405
+ ;; mapconcat doesn't append the \r\n for the final line
406
+ (setq command (format "%s%s\r\n\r\n" start-line message-headers))
407
+ (http-log (format "Connecting to %s %d\nCommand:\n%s\n" host port command))
408
+ (http-log message-headers)
409
+ (set-process-sentinel proc (or sentinel 'ignore))
410
+ (set-process-coding-system proc 'binary 'binary) ; we need \r\n
411
+ ;; we need this to be able to correctly decode the buffer with
412
+ ;; decode-coding-region later
413
+ (when (fboundp 'set-buffer-multibyte)
414
+ (with-current-buffer buf (set-buffer-multibyte nil)))
415
+ (set-process-filter proc 'http-filter)
416
+ (set-marker (process-mark proc) (point-max))
417
+ (process-send-string proc command)
418
+
419
+ proc))
420
+
421
+
422
+ ;; needed for xemacs. c&p from gnu emacs cvs sources
423
+ (unless (fboundp 'replace-regexp-in-string)
424
+ (defun replace-regexp-in-string (regexp rep string &optional
425
+ fixedcase literal subexp start)
426
+ (let ((l (length string))
427
+ (start (or start 0))
428
+ matches str mb me)
429
+ (save-match-data
430
+ (while (and (< start l) (string-match regexp string start))
431
+ (setq mb (match-beginning 0)
432
+ me (match-end 0))
433
+ (when (= me mb) (setq me (min l (1+ mb))))
434
+ (string-match regexp (setq str (substring string mb me)))
435
+ (setq matches
436
+ (cons (replace-match (if (stringp rep)
437
+ rep
438
+ (funcall rep (match-string 0 str)))
439
+ fixedcase literal str subexp)
440
+ (cons (substring string start mb)
441
+ matches)))
442
+ (setq start me))
443
+ (setq matches (cons (substring string start l) matches))
444
+ (apply #'concat (nreverse matches))))))
445
+
446
+ (provide 'http-get)
447
+
448
+ ;;; http-get.el ends here