setlr 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,669 @@
1
+ """An experimental XPath-based streaming filter for ElementTree's iterparse
2
+
3
+ For details see:
4
+ http://dalkescientific.com/writings/diary/archive/2006/11/06/iterparse_filter.html
5
+ """
6
+ from __future__ import print_function
7
+ # I have got to rearrange my site to use shorter URLs.
8
+
9
+ from future import standard_library
10
+ standard_library.install_aliases()
11
+ from builtins import zip
12
+ from builtins import object
13
+ __version__ = "0.9-experimental"
14
+
15
+ import re
16
+
17
+ dtd_validation = False
18
+ try:
19
+ from lxml import etree
20
+ dtd_validation = True
21
+ except ImportError:
22
+ try:
23
+ # Python 2.5
24
+ import xml.etree.cElementTree as etree
25
+ except ImportError:
26
+ try:
27
+ # Python 2.5
28
+ import xml.etree.ElementTree as etree
29
+ except ImportError:
30
+ try:
31
+ # normal cElementTree install
32
+ import cElementTree as etree
33
+ except ImportError:
34
+ # normal ElementTree install
35
+ import elementtree.ElementTree as etree
36
+
37
+ # define "letter" as "any character except /:[]()@={}* or in \s"
38
+ # (XXX make it match the XML spec)
39
+ # A URI is:
40
+ # letter+
41
+ # letter+ ':' letter+ --- a namespace prefixed term, like xml:space
42
+ # '{' [^}]* '}' letter+ --- a Clark namespace term, like {http://a}b
43
+ # Can also use a '*' in place of a URI or in the tag part of a namespaced field
44
+ #
45
+ # URIs are separated only be '/' and '//'.
46
+ # These may not occur together, eg, '///' is not allowed.
47
+
48
+ # Basing this tokenization method in part on elementtree.ElementPath
49
+ xpath_tokenizer = re.compile( r"""
50
+ (// | / ) # separators
51
+
52
+ | (?: # namespaced term
53
+ ([^\/\:\[\]\(\)\@\=\{\}\*\s]+) : # namespace
54
+ ([^\/\:\[\]\(\)\@\=\{\}\*\s]+|\*) # tag
55
+ )
56
+
57
+ | (?:
58
+ \{([^}]*)\} # namespace in Clark notation
59
+ ([^\/\:\[\]\(\)\@\=\{\}\*\s]+|\*) # tag
60
+ )
61
+
62
+ | ([^\/\:\[\]\(\)\@\=\{\}\*\s]+|\*) # tag with no namespace
63
+
64
+ | (.) # everything else; used to identify errors
65
+ """, re.X).findall
66
+ # """"" # fix emacs cruft; having too many special characters fools it
67
+
68
+
69
+ def tokenize(s):
70
+ pos = 0
71
+ for token in xpath_tokenizer(s):
72
+ op = token[0]
73
+ if op in ("/", "//"):
74
+ yield (op, None, pos)
75
+ elif token[1]:
76
+ yield ("namespace", (token[1], token[2]), pos)
77
+ elif token[3]:
78
+ yield ("clark", (token[3], token[4]), pos)
79
+ elif token[5]:
80
+ yield ("default", token[5], pos)
81
+ elif token[6]:
82
+ raise SyntaxError("Unknown symbol %r at position %d" %
83
+ (token[6], pos))
84
+ else:
85
+ raise AssertionError("Unknown token: %r" % (token,))
86
+
87
+ def _make_original_tag(op, args):
88
+ if op == "namespace":
89
+ return "%s:%s" % (args[0], args[1])
90
+ if op == "clark":
91
+ return "{%s}:%s" % (args[0], args[1])
92
+ if op == "default":
93
+ return args
94
+ raise AssertionError("Strange: %r %r" % (op, args))
95
+
96
+ def _verify_ordering(tokens):
97
+ if not tokens:
98
+ raise SyntaxError(
99
+ "empty xpath not supported (don't know how to handle that case)")
100
+ pos = 0
101
+ prev = None
102
+ SEP = 1
103
+ URI = 2
104
+ # Check that the path alternates between separator and uri
105
+ for op, args, pos in tokens:
106
+ if op in ("/", "//"):
107
+ if prev == SEP:
108
+ raise SyntaxError(
109
+ "separator %r may not follow separator at position %d" %
110
+ (op, pos))
111
+ prev = SEP
112
+ elif op in ("namespace", "clark", "default"):
113
+ if prev == URI:
114
+ errmsg = _make_original_tag(op, args)
115
+ raise SyntaxError(
116
+ "%r may not follow a separator at position %d" %
117
+ (errormsg, pos))
118
+ prev = URI
119
+ else:
120
+ raise AssertionError("Unknown op: %r, %r, %r" % (op, args, pos))
121
+
122
+ if tokens[-1][0] == "//":
123
+ raise AssertionError("xpath may not end with '//'")
124
+
125
+ # There are further optimizations. For example, if this
126
+ # returned a match function instead of the regex then it
127
+ # could special case terms like /blah//* to mean "startswith('/blah/')"
128
+ # The small performance advantages for most cases doesn't
129
+ # currently warrant the extra work.
130
+ def to_regexp(s, namespaces={}, default_namespace=None):
131
+ tokens = list(tokenize(s))
132
+ _verify_ordering(tokens)
133
+
134
+ ### Process the tokens
135
+ re_terms = []
136
+ if tokens[0][0] == "/":
137
+ re_terms.append("^")
138
+ tokens.pop(0)
139
+
140
+ for op, args, pos in tokens:
141
+ if op == "/":
142
+ pass
143
+ elif op == "//":
144
+ re_terms.append("(/[^/]+)*")
145
+ elif op in ("namespace", "clark", "default"):
146
+ # Break each apart to get the correct namespace and tag
147
+ if op == "namespace":
148
+ namespace, tag = args
149
+ try:
150
+ full_namespace = namespaces[namespace]
151
+ except KeyError:
152
+ raise SyntaxError("Unknown namespace %r at position %d" %
153
+ (namespace, pos))
154
+ elif op == "clark":
155
+ full_namespace, tag = args
156
+ elif op == "default":
157
+ full_namespace = default_namespace
158
+ tag = args
159
+
160
+ # Figure out which pattern to use for the combination
161
+ # of (namespace, namespace==None) x (tag, tag=='*')
162
+ if full_namespace is None:
163
+ # No namespace specified
164
+ if tag == "*":
165
+ # Select everything between the /s
166
+ re_terms.append("/[^/]+")
167
+ else:
168
+ # Select exactly the tag, no namespace
169
+ re_terms.append("/%s" % (re.escape(tag),))
170
+ else:
171
+ # namespace specified
172
+ if tag == "*":
173
+ # Select only fields in the given namespace
174
+ re_terms.append("/" +
175
+ re.escape("{%s}" % (full_namespace,)) +
176
+ "[^/]+")
177
+ else:
178
+ # Must match namespace and tag, exactly
179
+ re_terms.append("/" +
180
+ re.escape("{%s}%s" % (full_namespace, tag)))
181
+ else:
182
+ raise AssertionError("Unknown op %r" % (op,))
183
+
184
+ # Must be a complete match
185
+ re_terms.append("/$")
186
+
187
+ return "".join(re_terms)
188
+
189
+ class IterParseFilter(object):
190
+ def __init__(self, namespaces=None, default_namespace=None, validate_dtd=False):
191
+ if namespaces is None:
192
+ namespaces = {}
193
+ self.namespaces = namespaces
194
+ self.default_namespace = default_namespace
195
+ self.validate_dtd = validate_dtd
196
+
197
+ self._start_document_handlers = []
198
+ self._end_document_handlers = []
199
+
200
+ self._start_filters = []
201
+ self._end_filters = []
202
+ self._default_start_filters = []
203
+ self._default_end_filters = []
204
+ self._iter_start_filters = []
205
+ self._iter_end_filters = []
206
+
207
+ self._start_ns_handlers = []
208
+ self._end_ns_handlers = []
209
+ self._iter_start_ns = False
210
+ self._iter_end_ns = False
211
+
212
+ def on_start_document(self, handler):
213
+ self._start_document_handlers.append(handler)
214
+ def on_end_document(self, handler):
215
+ self._end_document_handlers.append(handler)
216
+
217
+ def _add_handler(self, filters, path, handler):
218
+ path_re = to_regexp(path,
219
+ namespaces = self.namespaces,
220
+ default_namespace = self.default_namespace)
221
+ filters.append( (path, re.compile(path_re).search, handler) )
222
+ def on_start(self, path, handler):
223
+ self._add_handler(self._start_filters, path, handler)
224
+ def on_end(self, path, handler):
225
+ self._add_handler(self._end_filters, path, handler)
226
+ def on_start_default(self, path, handler):
227
+ self._add_handler(self._default_start_filters, path, handler)
228
+ def on_end_default(self, path, handler):
229
+ self._add_handler(self._default_end_filters, path, handler)
230
+
231
+ def _add_yielder(self, yielders, path):
232
+ path_re = to_regexp(path,
233
+ namespaces = self.namespaces,
234
+ default_namespace = self.default_namespace)
235
+
236
+ yielders.append( (path, re.compile(path_re).search) )
237
+ def iter_start(self, path):
238
+ self._add_yielder(self._iter_start_filters, path)
239
+ def iter_end(self, path):
240
+ self._add_yielder(self._iter_end_filters, path)
241
+
242
+ def on_start_ns(self, handler):
243
+ self._start_ns_handlers.append(handler)
244
+ def on_end_ns(self, handler):
245
+ self._end_ns_handlers.append(handler)
246
+ def iter_start_ns(self):
247
+ self._iter_start_ns = True
248
+ def iter_end_ns(self):
249
+ self._iter_end_ns = True
250
+
251
+ def _get_filter_info(self, category):
252
+ for (_, _, pat, handler) in self.filters[category]:
253
+ yield (pat, handler)
254
+
255
+ def create_fa(self):
256
+ # Make copies of everything to emphasize that they must
257
+ # not be changed during processing.
258
+ return FilterAutomata(
259
+ start_document_handlers = self._start_document_handlers,
260
+ end_document_handlers = self._end_document_handlers[::-1], # reverse!
261
+ start_filters = self._start_filters[:],
262
+ end_filters = self._end_filters[::-1], # reversing here!
263
+ default_start_filters = self._default_start_filters[:],
264
+ default_end_filters = self._default_end_filters[::-1], # reversing!
265
+ iter_start_filters = self._iter_start_filters[:],
266
+ iter_end_filters = self._iter_end_filters[:],
267
+
268
+ start_ns_handlers = self._start_ns_handlers[:],
269
+ end_ns_handlers = self._end_ns_handlers[::-1], # reversing here!
270
+ iter_start_ns = self._iter_start_ns,
271
+ iter_end_ns = self._iter_end_ns)
272
+
273
+ # These forward to the underlying automata; make a new one each time.
274
+ def parse(self, file, state=None):
275
+ return self.create_fa().parse(file, state, self.validate_dtd)
276
+
277
+ # Experimental
278
+ def iterparse(self, file):
279
+ return self.create_fa().iterparse(file, self.validate_dtd)
280
+ # I need a better name
281
+ def handler_parse(self, file, state=None):
282
+ return self.create_fa().handler_parse(file, state)
283
+
284
+
285
+ class FilterAutomata(object):
286
+ def __init__(self,
287
+ start_document_handlers,
288
+ end_document_handlers,
289
+
290
+ start_filters,
291
+ end_filters,
292
+ default_start_filters,
293
+ default_end_filters,
294
+ iter_start_filters,
295
+ iter_end_filters,
296
+
297
+ start_ns_handlers,
298
+ end_ns_handlers,
299
+ iter_start_ns,
300
+ iter_end_ns):
301
+ self.start_document_handlers = start_document_handlers
302
+ self.end_document_handlers = end_document_handlers
303
+
304
+ self.start_filters = start_filters
305
+ self.end_filters = end_filters
306
+ self.default_start_filters = default_start_filters
307
+ self.default_end_filters = default_end_filters
308
+ self.iter_start_filters = iter_start_filters
309
+ self.iter_end_filters = iter_end_filters
310
+
311
+ self.start_ns_handlers = start_ns_handlers
312
+ self.end_ns_handlers = end_ns_handlers
313
+ self.iter_start_ns = iter_start_ns
314
+ self.iter_end_ns = iter_end_ns
315
+
316
+ # Can cache results over multiple invocations
317
+ # NOTE: not thread-safe. Though given the GIL
318
+ # this shouldn't be a problem.
319
+ self.dfa = {}
320
+
321
+ def _new_node(self, stack_as_path):
322
+ start_handlers = []
323
+ for (path, matcher, handler) in self.start_filters:
324
+ if matcher(stack_as_path):
325
+ start_handlers.append(handler)
326
+
327
+ if not start_handlers:
328
+ # Any defaults?
329
+ for (path, matcher, handler) in self.default_start_filters:
330
+ if matcher(stack_as_path):
331
+ start_handlers.append(handler)
332
+
333
+ end_handlers = []
334
+ for (path, matcher, handler) in self.end_filters:
335
+ if matcher(stack_as_path):
336
+ end_handlers.append(handler)
337
+ if not end_handlers:
338
+ # Any defaults?
339
+ for (path, matcher, handler) in self.default_end_filters:
340
+ if matcher(stack_as_path):
341
+ end_handlers.append(handler)
342
+
343
+ # Have all the handlers, now check for yields
344
+ iter_start = False
345
+ for (path, matcher) in self.iter_start_filters:
346
+ if matcher(stack_as_path):
347
+ iter_start = True
348
+ break
349
+
350
+ iter_end = False
351
+ for (path, matcher) in self.iter_end_filters:
352
+ if matcher(stack_as_path):
353
+ iter_end = True
354
+ break
355
+
356
+ new_node = ({}, start_handlers, end_handlers, iter_start, iter_end)
357
+ return new_node
358
+
359
+ def _needed_actions(self, iter=False, handler=False):
360
+ if (not handler) and (not cb):
361
+ raise AssertionError("must specify one")
362
+ actions = ("start", "end")
363
+ if ( (handler and self.start_ns_handlers) or
364
+ (iter and self.iter_start_ns) ):
365
+ actions = actions + ("start-ns",)
366
+
367
+ if ( (handler and self.end_ns_handlers) or
368
+ (iter and self.iter_end_ns) ):
369
+ actions = actions + ("end-ns",)
370
+ return actions
371
+
372
+ # I plan to implement 'handler_parse' as a near copy of 'parse'
373
+ # but without any yield statements.
374
+ def handler_parse(self, file, state=None):
375
+ for x in self.parse(file, state):
376
+ pass
377
+
378
+ # I plan to implement 'iterparse' as a near copy of 'parse'
379
+ # but without any references to callbacks
380
+ def iterparse(self, file, validate_dtd=False):
381
+ return self.parse(file, None, validate_dtd)
382
+
383
+ def parse(self, file, state=None, validate_dtd=False):
384
+ if not dtd_validation:
385
+ validate_dtd = False
386
+ node_stack = []
387
+ node_stack_append = node_stack.append
388
+ tag_stack = []
389
+ tag_stack_append = tag_stack.append
390
+ # children, start handlers, end handlers, iter start, iter end
391
+ node = (self.dfa, [], [], False, False)
392
+
393
+ # synthesize start-document events
394
+ for handler in self.start_document_handlers:
395
+ handler("start-document", None, state)
396
+
397
+ # figure out if I also need start-ns and/or end-ns events
398
+ needed_actions = self._needed_actions(True, True)
399
+ kwargs = {}
400
+ if validate_dtd:
401
+ kwargs = dict(dtd_validation=True)
402
+ last_start = 0
403
+ total_mem = 0
404
+ before = None
405
+ for (event, ele) in etree.iterparse(file, needed_actions, **kwargs):
406
+ if event == "start":
407
+ tag = ele.tag
408
+ # Descend into node; track where I am
409
+ tag_stack_append(tag)
410
+ node_stack_append(node)
411
+ stack_as_path = "/" + ("/".join(tag_stack)) + "/"
412
+ new_node = self._new_node(stack_as_path)
413
+ node = new_node
414
+
415
+ # call the start handlers then yield the element
416
+ for start_handler in node[1]:
417
+ start_handler(event, ele, state)
418
+ if node[3]:
419
+ yield (event, ele)
420
+ #print total_mem
421
+
422
+ elif event == "end":
423
+ # call the end handlers then yield the element
424
+ for end_handler in node[2]:
425
+ end_handler(event, ele, state)
426
+ del tag_stack[-1]
427
+ if node[4]:
428
+ yield (event, ele)
429
+ # It's safe to call clear() here because no descendants will be
430
+ # accessed
431
+ ele.clear()
432
+ if ele.getparent() is not None:
433
+ ele.getparent().remove(ele)
434
+
435
+ # Also eliminate now-empty references from the root node to elem
436
+ #for ancestor in ele.xpath('ancestor-or-self::*'):
437
+ # while ancestor.getprevious() is not None:
438
+ # del ancestor.getparent()[0]
439
+ node = node_stack.pop()
440
+
441
+ elif event == "start-ns":
442
+ for handler in self.start_ns_handlers:
443
+ handler(event, ele, state)
444
+ if self.iter_start_ns:
445
+ print('start-ns')
446
+ yield (event, ele)
447
+
448
+ elif event == "end-ns":
449
+ for handler in self.end_ns_handlers:
450
+ handler(event, ele, state)
451
+ if self.iter_start_ns:
452
+ print('end-ns')
453
+ yield (event, ele)
454
+ # It's safe to call clear() here because no descendants will be
455
+ # accessed
456
+ ele.clear()
457
+ ele.getparent().remove(ele)
458
+ # Also eliminate now-empty references from the root node to elem
459
+ #for ancestor in ele.xpath('ancestor-or-self::*'):
460
+ # while ancestor.getprevious() is not None:
461
+ # del ancestor.getparent()[0]
462
+
463
+ for handler in self.end_document_handlers:
464
+ handler("end-document", None, state)
465
+
466
+
467
+ #### An incomplete test suite ####
468
+
469
+ def test_path(path, args):
470
+ #print "**** test_path", repr(path), repr(args)
471
+ pattern = to_regexp(path)
472
+ pat = re.compile(pattern)
473
+ s = "/" + ("/".join(args)) + "/"
474
+ #print pattern, s
475
+ return bool(pat.search(s))
476
+
477
+ def test_ns_path(path, args):
478
+ #print "**** test_path", repr(path), repr(args)
479
+ pattern = to_regexp(path,
480
+ namespaces = {
481
+ "xml": "http://www.w3.org/XML/1998/namespace",
482
+ "das2": "http://biodas.org/documents/das2"},
483
+ # the empty namespace is not the same as no namespace!
484
+ default_namespace = "")
485
+
486
+ pat = re.compile(pattern)
487
+ s = "/" + ("/".join(args)) + "/"
488
+ #print pattern, s
489
+ return bool(pat.search(s))
490
+
491
+ def test_syntax():
492
+ for (xpath, tag_list, expect) in (
493
+ ("A", ["A"], 1),
494
+ ("A", ["AA"], 0),
495
+ ("A", ["B", "A"], 1),
496
+ ("/A", ["B", "A"], 0),
497
+ ("/B", ["B", "A"], 0),
498
+ ("//A", ["B", "A"], 1),
499
+ ("A//B", ["A", "B"], 1),
500
+ ("A//B", ["C", "A", "B"], 1),
501
+ ("/A//B", ["C", "A", "B"], 0),
502
+ ("/B/*", ["B", "A"], 1),
503
+ # Test back-tracking; both greedy and non-greedy cases
504
+ ("A//B//C//D", ["A", "B", "C", "B", "D"], 1),
505
+ ("A//B/D", ["A", "B", "C", "B", "D"], 1),
506
+
507
+ # Clark namespace tests
508
+ ("{http://x.com}A", ["{http://x.com}A"], 1),
509
+ ("{http://x.org}A", ["{http://x.com}A"], 0),
510
+ ("{http://x.org}A", ["{http://x.com}B", "{http://x.org}A"], 1),
511
+ ("*", ["{http://x.com}A"], 1),
512
+ ("{http://x.com}*", ["{http://x.com}A"], 1),
513
+ ("{http://x.com}*", ["{http://x.org}A"], 0),
514
+
515
+ ):
516
+ got = test_path(xpath, tag_list)
517
+ if got != expect:
518
+ raise AssertionError("xpath %r against %r got %r, expected %r" %
519
+ (xpath, tag_list, got, bool(expect)))
520
+
521
+ for (xpath, tag_list, expect) in (
522
+ # various namespace checks
523
+ ("xml:A", ["{http://www.w3.org/XML/1998/namespace}A"], 1),
524
+ ("xml:A", ["{http://www.w3.org/XML/1998/namespace2}A"], 0),
525
+ ("xml:A", ["{http://www.w3.org/XML/1998/namespace}AA"], 0),
526
+ ("xml:A", ["{http://www.w3.org/XML/1998/namespace}B",
527
+ "{http://www.w3.org/XML/1998/namespace}A"], 1),
528
+ ("xml:B", ["{http://www.w3.org/XML/1998/namespace}B",
529
+ "{http://www.w3.org/XML/1998/namespace}A"], 0),
530
+
531
+ ("A", ["{}A"], 1),
532
+ ("A", ["A"], 0),
533
+
534
+ ("*", ["A"], 0),
535
+ ("*", ["{}A"], 1),
536
+ ("das2:*", ["{http://biodas.org/documents/das2}AAA"], 1),
537
+ ("das2:*", ["{}AAA"], 0),
538
+ ("xml:*/das2:*", ["{http://www.w3.org/XML/1998/namespace}ABC",
539
+ "{http://biodas.org/documents/das2}ABC"], 1),
540
+ ("das2:*/xml:*", ["{http://www.w3.org/XML/1998/namespace}ABC",
541
+ "{http://biodas.org/documents/das2}ABC"], 0),
542
+
543
+
544
+ ):
545
+ got = test_ns_path(xpath, tag_list)
546
+ if got != expect:
547
+ raise AssertionError("xpath %r against %r got %r, expected %r" %
548
+ (xpath, tag_list, got, bool(expect)))
549
+
550
+ def test_filtering():
551
+ import io as StringIO
552
+ f = StringIO.StringIO("""\
553
+ <A><AA>
554
+ <B xmlns="http://z/"><C/><spam:D xmlns:spam="http://spam/">eggs</spam:D></B>
555
+ <B x='6'>foo<B y='7'>bar</B>baz</B>
556
+ </AA></A>""")
557
+ special = object()
558
+ class Capture(object):
559
+ def __init__(self):
560
+ self.history = []
561
+ def __call__(self, event, ele, state):
562
+ if state is not special:
563
+ raise AssertionError("Did not get expected state")
564
+ self.history.append( (event, ele) )
565
+
566
+ filter = IterParseFilter()
567
+ capture_all = Capture()
568
+ filter.on_start_document(capture_all)
569
+ filter.on_start("*", capture_all)
570
+ filter.on_end("*", capture_all)
571
+ filter.on_end_document(capture_all)
572
+ filter.on_start_ns(capture_all)
573
+ filter.on_end_ns(capture_all)
574
+
575
+ for x in filter.parse(f, state=special):
576
+ raise AssertionError("should not yield %r" % (x,))
577
+
578
+ expect_history = (
579
+ ("start-document", None),
580
+ ("start", "A"),
581
+ ("start", "AA"),
582
+ ("start-ns", ("", "http://z/")),
583
+ ("start", "{http://z/}B"),
584
+ ("start", "{http://z/}C"),
585
+ ("end", "{http://z/}C"),
586
+ ("start-ns", ("spam", "http://spam/")),
587
+ ("start", "{http://spam/}D"),
588
+ ("end", "{http://spam/}D"),
589
+ ("end-ns", None),
590
+ ("end", "{http://z/}B"),
591
+ ("end-ns", None),
592
+ ("start", "B"),
593
+ ("start", "B"),
594
+ ("end", "B"),
595
+ ("end", "B"),
596
+ ("end", "AA"),
597
+ ("end","A"),
598
+ ("end-document", None),
599
+ )
600
+
601
+ for (got, expect) in zip(capture_all.history, expect_history):
602
+ event, ele = got
603
+ tag = getattr(ele, "tag", ele)
604
+ if (event, tag) != expect:
605
+ raise AssertionError("Expected %r Got %r" % (expect, (event, tag)))
606
+ if len(capture_all.history) != len(expect_history):
607
+ raise AssertionError("Length mismatch")
608
+
609
+ f.seek(0)
610
+ filter = IterParseFilter()
611
+ def must_match_B(event, ele, state):
612
+ if ele.tag != "B":
613
+ raise AssertionError("%r is not B" % (ele.tag,))
614
+ def must_match_B_y7(event, ele, state):
615
+ if ele.tag != "B":
616
+ raise AssertionError("%r is not B" % (ele.tag,))
617
+ if ele.attrib["y"] != "7":
618
+ raise AssertionError("%r is not the correct B" % (ele.tag,))
619
+
620
+ filter.on_start("B", must_match_B)
621
+ filter.on_start("B/B", must_match_B_y7)
622
+
623
+ f.seek
624
+
625
+
626
+ def test_parse():
627
+ import os
628
+ filename = "/Users/dalke/Music/iTunes/iTunes Music Library.xml"
629
+ if not os.path.exists(filename):
630
+ print ("Cannot find %r: skipping test" % (filename,))
631
+ return
632
+
633
+ # Work through callbacks
634
+ ef = IterParseFilter()
635
+ def print_info(event, ele, state):
636
+ d = {}
637
+ children = iter(ele)
638
+ for child in children:
639
+ key = child.text
640
+ value = children.next().text
641
+ d[key] = value
642
+ print ("%r is by %r" % (d["Name"], d.get("Artist", "<unknown>")))
643
+ ele.clear()
644
+
645
+ ef.on_end("/plist/dict/dict/dict", print_info)
646
+ ef.handler_parse(open(filename))
647
+
648
+ # Work through iterators
649
+ ef = IterParseFilter()
650
+ ef.iter_end("/plist/dict/dict/dict")
651
+ for (event, ele) in ef.iterparse(open(filename)):
652
+ d = {}
653
+ children = iter(ele)
654
+ for child in children:
655
+ key = child.text
656
+ value = children.next().text
657
+ d[key] = value
658
+ print ("%r is a %r song" % (d["Name"], d.get("Genre", "<unknown>")))
659
+ ele.clear()
660
+
661
+
662
+ def test():
663
+ test_syntax()
664
+ test_filtering()
665
+ test_parse()
666
+
667
+ if __name__ == "__main__":
668
+ test()
669
+ print ("All tests passed.")