tidy-ext 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/LICENSE +50 -0
- data/README +12 -0
- data/Rakefile +60 -0
- data/VERSION +1 -0
- data/ext/tidy/access.c +3310 -0
- data/ext/tidy/access.h +279 -0
- data/ext/tidy/alloc.c +107 -0
- data/ext/tidy/attrask.c +209 -0
- data/ext/tidy/attrdict.c +2398 -0
- data/ext/tidy/attrdict.h +122 -0
- data/ext/tidy/attrget.c +213 -0
- data/ext/tidy/attrs.c +1911 -0
- data/ext/tidy/attrs.h +374 -0
- data/ext/tidy/buffio.c +232 -0
- data/ext/tidy/buffio.h +118 -0
- data/ext/tidy/charsets.c +1032 -0
- data/ext/tidy/charsets.h +14 -0
- data/ext/tidy/clean.c +2674 -0
- data/ext/tidy/clean.h +87 -0
- data/ext/tidy/config.c +1746 -0
- data/ext/tidy/config.h +153 -0
- data/ext/tidy/entities.c +419 -0
- data/ext/tidy/entities.h +24 -0
- data/ext/tidy/extconf.rb +5 -0
- data/ext/tidy/fileio.c +106 -0
- data/ext/tidy/fileio.h +46 -0
- data/ext/tidy/forward.h +69 -0
- data/ext/tidy/iconvtc.c +105 -0
- data/ext/tidy/iconvtc.h +15 -0
- data/ext/tidy/istack.c +373 -0
- data/ext/tidy/lexer.c +3825 -0
- data/ext/tidy/lexer.h +617 -0
- data/ext/tidy/localize.c +1882 -0
- data/ext/tidy/mappedio.c +329 -0
- data/ext/tidy/mappedio.h +16 -0
- data/ext/tidy/message.h +207 -0
- data/ext/tidy/parser.c +4408 -0
- data/ext/tidy/parser.h +76 -0
- data/ext/tidy/platform.h +636 -0
- data/ext/tidy/pprint.c +2276 -0
- data/ext/tidy/pprint.h +93 -0
- data/ext/tidy/ruby-tidy.c +195 -0
- data/ext/tidy/streamio.c +1407 -0
- data/ext/tidy/streamio.h +222 -0
- data/ext/tidy/tagask.c +286 -0
- data/ext/tidy/tags.c +955 -0
- data/ext/tidy/tags.h +235 -0
- data/ext/tidy/tidy-int.h +129 -0
- data/ext/tidy/tidy.h +1097 -0
- data/ext/tidy/tidyenum.h +622 -0
- data/ext/tidy/tidylib.c +1751 -0
- data/ext/tidy/tmbstr.c +306 -0
- data/ext/tidy/tmbstr.h +92 -0
- data/ext/tidy/utf8.c +539 -0
- data/ext/tidy/utf8.h +52 -0
- data/ext/tidy/version.h +14 -0
- data/ext/tidy/win32tc.c +795 -0
- data/ext/tidy/win32tc.h +19 -0
- data/spec/spec_helper.rb +5 -0
- data/spec/tidy/compat_spec.rb +44 -0
- data/spec/tidy/remote_uri_spec.rb +14 -0
- data/spec/tidy/test1.html +5 -0
- data/spec/tidy/tidy_spec.rb +34 -0
- metadata +125 -0
data/ext/tidy/fileio.c
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
/* fileio.c -- does standard I/O
|
2
|
+
|
3
|
+
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
4
|
+
See tidy.h for the copyright notice.
|
5
|
+
|
6
|
+
CVS Info :
|
7
|
+
|
8
|
+
$Author: arnaud02 $
|
9
|
+
$Date: 2007/05/30 16:47:31 $
|
10
|
+
$Revision: 1.17 $
|
11
|
+
|
12
|
+
Default implementations of Tidy input sources
|
13
|
+
and output sinks based on standard C FILE*.
|
14
|
+
|
15
|
+
*/
|
16
|
+
|
17
|
+
#include <stdio.h>
|
18
|
+
|
19
|
+
#include "forward.h"
|
20
|
+
#include "fileio.h"
|
21
|
+
#include "tidy.h"
|
22
|
+
|
23
|
+
typedef struct _fp_input_source
|
24
|
+
{
|
25
|
+
FILE* fp;
|
26
|
+
TidyBuffer unget;
|
27
|
+
} FileSource;
|
28
|
+
|
29
|
+
static int TIDY_CALL filesrc_getByte( void* sourceData )
|
30
|
+
{
|
31
|
+
FileSource* fin = (FileSource*) sourceData;
|
32
|
+
int bv;
|
33
|
+
if ( fin->unget.size > 0 )
|
34
|
+
bv = tidyBufPopByte( &fin->unget );
|
35
|
+
else
|
36
|
+
bv = fgetc( fin->fp );
|
37
|
+
return bv;
|
38
|
+
}
|
39
|
+
|
40
|
+
static Bool TIDY_CALL filesrc_eof( void* sourceData )
|
41
|
+
{
|
42
|
+
FileSource* fin = (FileSource*) sourceData;
|
43
|
+
Bool isEOF = ( fin->unget.size == 0 );
|
44
|
+
if ( isEOF )
|
45
|
+
isEOF = feof( fin->fp ) != 0;
|
46
|
+
return isEOF;
|
47
|
+
}
|
48
|
+
|
49
|
+
static void TIDY_CALL filesrc_ungetByte( void* sourceData, byte bv )
|
50
|
+
{
|
51
|
+
FileSource* fin = (FileSource*) sourceData;
|
52
|
+
tidyBufPutByte( &fin->unget, bv );
|
53
|
+
}
|
54
|
+
|
55
|
+
#if SUPPORT_POSIX_MAPPED_FILES
|
56
|
+
#define initFileSource initStdIOFileSource
|
57
|
+
#define freeFileSource freeStdIOFileSource
|
58
|
+
#endif
|
59
|
+
int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* inp, FILE* fp )
|
60
|
+
{
|
61
|
+
FileSource* fin = NULL;
|
62
|
+
|
63
|
+
fin = (FileSource*) TidyAlloc( allocator, sizeof(FileSource) );
|
64
|
+
if ( !fin )
|
65
|
+
return -1;
|
66
|
+
TidyClearMemory( fin, sizeof(FileSource) );
|
67
|
+
fin->unget.allocator = allocator;
|
68
|
+
fin->fp = fp;
|
69
|
+
|
70
|
+
inp->getByte = filesrc_getByte;
|
71
|
+
inp->eof = filesrc_eof;
|
72
|
+
inp->ungetByte = filesrc_ungetByte;
|
73
|
+
inp->sourceData = fin;
|
74
|
+
|
75
|
+
return 0;
|
76
|
+
}
|
77
|
+
|
78
|
+
void TY_(freeFileSource)( TidyInputSource* inp, Bool closeIt )
|
79
|
+
{
|
80
|
+
FileSource* fin = (FileSource*) inp->sourceData;
|
81
|
+
if ( closeIt && fin && fin->fp )
|
82
|
+
fclose( fin->fp );
|
83
|
+
tidyBufFree( &fin->unget );
|
84
|
+
TidyFree( fin->unget.allocator, fin );
|
85
|
+
}
|
86
|
+
|
87
|
+
void TIDY_CALL TY_(filesink_putByte)( void* sinkData, byte bv )
|
88
|
+
{
|
89
|
+
FILE* fout = (FILE*) sinkData;
|
90
|
+
fputc( bv, fout );
|
91
|
+
}
|
92
|
+
|
93
|
+
void TY_(initFileSink)( TidyOutputSink* outp, FILE* fp )
|
94
|
+
{
|
95
|
+
outp->putByte = TY_(filesink_putByte);
|
96
|
+
outp->sinkData = fp;
|
97
|
+
}
|
98
|
+
|
99
|
+
/*
|
100
|
+
* local variables:
|
101
|
+
* mode: c
|
102
|
+
* indent-tabs-mode: nil
|
103
|
+
* c-basic-offset: 4
|
104
|
+
* eval: (c-set-offset 'substatement-open 0)
|
105
|
+
* end:
|
106
|
+
*/
|
data/ext/tidy/fileio.h
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#ifndef __FILEIO_H__
|
2
|
+
#define __FILEIO_H__
|
3
|
+
|
4
|
+
/** @file fileio.h - does standard C I/O
|
5
|
+
|
6
|
+
Implementation of a FILE* based TidyInputSource and
|
7
|
+
TidyOutputSink.
|
8
|
+
|
9
|
+
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
10
|
+
See tidy.h for the copyright notice.
|
11
|
+
|
12
|
+
CVS Info:
|
13
|
+
$Author: arnaud02 $
|
14
|
+
$Date: 2007/05/30 16:47:31 $
|
15
|
+
$Revision: 1.8 $
|
16
|
+
*/
|
17
|
+
|
18
|
+
#include "buffio.h"
|
19
|
+
#ifdef __cplusplus
|
20
|
+
extern "C" {
|
21
|
+
#endif
|
22
|
+
|
23
|
+
/** Allocate and initialize file input source */
|
24
|
+
int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* source, FILE* fp );
|
25
|
+
|
26
|
+
/** Free file input source */
|
27
|
+
void TY_(freeFileSource)( TidyInputSource* source, Bool closeIt );
|
28
|
+
|
29
|
+
#if SUPPORT_POSIX_MAPPED_FILES
|
30
|
+
/** Allocate and initialize file input source using Standard C I/O */
|
31
|
+
int TY_(initStdIOFileSource)( TidyAllocator *allocator, TidyInputSource* source, FILE* fp );
|
32
|
+
|
33
|
+
/** Free file input source using Standard C I/O */
|
34
|
+
void TY_(freeStdIOFileSource)( TidyInputSource* source, Bool closeIt );
|
35
|
+
#endif
|
36
|
+
|
37
|
+
/** Initialize file output sink */
|
38
|
+
void TY_(initFileSink)( TidyOutputSink* sink, FILE* fp );
|
39
|
+
|
40
|
+
/* Needed for internal declarations */
|
41
|
+
void TIDY_CALL TY_(filesink_putByte)( void* sinkData, byte bv );
|
42
|
+
|
43
|
+
#ifdef __cplusplus
|
44
|
+
}
|
45
|
+
#endif
|
46
|
+
#endif /* __FILEIO_H__ */
|
data/ext/tidy/forward.h
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
#ifndef __FORWARD_H__
|
2
|
+
#define __FORWARD_H__
|
3
|
+
|
4
|
+
/* forward.h -- Forward declarations for major Tidy structures
|
5
|
+
|
6
|
+
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
7
|
+
See tidy.h for the copyright notice.
|
8
|
+
|
9
|
+
CVS Info :
|
10
|
+
|
11
|
+
$Author: arnaud02 $
|
12
|
+
$Date: 2007/02/11 09:45:52 $
|
13
|
+
$Revision: 1.7 $
|
14
|
+
|
15
|
+
Avoids many include file circular dependencies.
|
16
|
+
|
17
|
+
Try to keep this file down to the minimum to avoid
|
18
|
+
cross-talk between modules.
|
19
|
+
|
20
|
+
Header files include this file. C files include tidy-int.h.
|
21
|
+
|
22
|
+
*/
|
23
|
+
|
24
|
+
#include "platform.h"
|
25
|
+
#include "tidy.h"
|
26
|
+
|
27
|
+
/* Internal symbols are prefixed to avoid clashes with other libraries */
|
28
|
+
#define TYDYAPPEND(str1,str2) str1##str2
|
29
|
+
#define TY_(str) TYDYAPPEND(prvTidy,str)
|
30
|
+
|
31
|
+
struct _StreamIn;
|
32
|
+
typedef struct _StreamIn StreamIn;
|
33
|
+
|
34
|
+
struct _StreamOut;
|
35
|
+
typedef struct _StreamOut StreamOut;
|
36
|
+
|
37
|
+
struct _TidyDocImpl;
|
38
|
+
typedef struct _TidyDocImpl TidyDocImpl;
|
39
|
+
|
40
|
+
|
41
|
+
struct _Dict;
|
42
|
+
typedef struct _Dict Dict;
|
43
|
+
|
44
|
+
struct _Attribute;
|
45
|
+
typedef struct _Attribute Attribute;
|
46
|
+
|
47
|
+
struct _AttVal;
|
48
|
+
typedef struct _AttVal AttVal;
|
49
|
+
|
50
|
+
struct _Node;
|
51
|
+
typedef struct _Node Node;
|
52
|
+
|
53
|
+
struct _IStack;
|
54
|
+
typedef struct _IStack IStack;
|
55
|
+
|
56
|
+
struct _Lexer;
|
57
|
+
typedef struct _Lexer Lexer;
|
58
|
+
|
59
|
+
extern TidyAllocator TY_(g_default_allocator);
|
60
|
+
|
61
|
+
/** Wrappers for easy memory allocation using an allocator */
|
62
|
+
#define TidyAlloc(allocator, size) ((allocator)->vtbl->alloc((allocator), (size)))
|
63
|
+
#define TidyRealloc(allocator, block, size) ((allocator)->vtbl->realloc((allocator), (block), (size)))
|
64
|
+
#define TidyFree(allocator, block) ((allocator)->vtbl->free((allocator), (block)))
|
65
|
+
#define TidyPanic(allocator, msg) ((allocator)->vtbl->panic((allocator), (msg)))
|
66
|
+
#define TidyClearMemory(block, size) memset((block), 0, (size))
|
67
|
+
|
68
|
+
|
69
|
+
#endif /* __FORWARD_H__ */
|
data/ext/tidy/iconvtc.c
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
/* iconvtc.c -- Interface to iconv transcoding routines
|
2
|
+
|
3
|
+
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University
|
4
|
+
See tidy.h for the copyright notice.
|
5
|
+
|
6
|
+
$Id: iconvtc.c,v 1.2 2008/08/09 11:55:27 hoehrmann Exp $
|
7
|
+
*/
|
8
|
+
|
9
|
+
#include "tidy.h"
|
10
|
+
#include "forward.h"
|
11
|
+
#include "streamio.h"
|
12
|
+
|
13
|
+
#ifdef TIDY_ICONV_SUPPORT
|
14
|
+
|
15
|
+
#include <iconv.h>
|
16
|
+
|
17
|
+
/* maximum number of bytes for a single character */
|
18
|
+
#define TC_INBUFSIZE 16
|
19
|
+
|
20
|
+
/* maximum number of characters per byte sequence */
|
21
|
+
#define TC_OUTBUFSIZE 16
|
22
|
+
|
23
|
+
Bool IconvInitInputTranscoder(void)
|
24
|
+
{
|
25
|
+
return no;
|
26
|
+
}
|
27
|
+
|
28
|
+
void IconvUninitInputTranscoder(void)
|
29
|
+
{
|
30
|
+
return;
|
31
|
+
}
|
32
|
+
|
33
|
+
int IconvGetChar(byte firstByte, StreamIn * in, uint * bytesRead)
|
34
|
+
{
|
35
|
+
iconv_t cd;
|
36
|
+
TidyInputSource * source;
|
37
|
+
char inbuf[TC_INBUFSIZE] = { 0 };
|
38
|
+
char outbuf[TC_OUTBUFSIZE] = { 0 };
|
39
|
+
size_t inbufsize = 0;
|
40
|
+
|
41
|
+
assert( in != NULL );
|
42
|
+
assert( &in->source != NULL );
|
43
|
+
assert( bytesRead != NULL );
|
44
|
+
assert( in->iconvptr != 0 );
|
45
|
+
|
46
|
+
cd = (iconv_t)in->iconvptr;
|
47
|
+
source = &in->source;
|
48
|
+
|
49
|
+
inbuf[inbufsize++] = (char)firstByte;
|
50
|
+
|
51
|
+
while(inbufsize < TC_INBUFSIZE)
|
52
|
+
{
|
53
|
+
char * outbufptr = (char*)outbuf;
|
54
|
+
char * inbufptr = (char*)inbuf;
|
55
|
+
size_t readNow = inbufsize;
|
56
|
+
size_t writeNow = TC_OUTBUFSIZE;
|
57
|
+
size_t result = 0;
|
58
|
+
int iconv_errno = 0;
|
59
|
+
int nextByte = EndOfStream;
|
60
|
+
|
61
|
+
result = iconv(cd, (const char**)&inbufptr, &readNow, (char**)&outbufptr, &writeNow);
|
62
|
+
iconv_errno = errno;
|
63
|
+
|
64
|
+
if (result != (size_t)(-1))
|
65
|
+
{
|
66
|
+
int c;
|
67
|
+
|
68
|
+
/* create codepoint from UTF-32LE octets */
|
69
|
+
c = (unsigned char)outbuf[0];
|
70
|
+
c += (unsigned char)outbuf[1] << 8;
|
71
|
+
c += (unsigned char)outbuf[2] << 16;
|
72
|
+
c += (unsigned char)outbuf[3] << 32;
|
73
|
+
|
74
|
+
/* set number of read bytes */
|
75
|
+
*bytesRead = inbufsize;
|
76
|
+
|
77
|
+
return c;
|
78
|
+
}
|
79
|
+
|
80
|
+
assert( iconv_errno != EILSEQ ); /* broken multibyte sequence */
|
81
|
+
assert( iconv_errno != E2BIG ); /* not enough memory */
|
82
|
+
assert( iconv_errno == EINVAL ); /* incomplete sequence */
|
83
|
+
|
84
|
+
/* we need more bytes */
|
85
|
+
nextByte = source->getByte(source->sourceData);
|
86
|
+
|
87
|
+
if (nextByte == EndOfStream)
|
88
|
+
{
|
89
|
+
/* todo: error message for broken stream? */
|
90
|
+
|
91
|
+
*bytesRead = inbufsize;
|
92
|
+
return EndOfStream;
|
93
|
+
}
|
94
|
+
|
95
|
+
inbuf[inbufsize++] = (char)nextByte;
|
96
|
+
}
|
97
|
+
|
98
|
+
/* No full character found after reading TC_INBUFSIZE bytes, */
|
99
|
+
/* give up to read this stream, it's obviously unreadable. */
|
100
|
+
|
101
|
+
/* todo: error message for broken stream? */
|
102
|
+
return EndOfStream;
|
103
|
+
}
|
104
|
+
|
105
|
+
#endif /* TIDY_ICONV_SUPPORT */
|
data/ext/tidy/iconvtc.h
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#ifndef __ICONVTC_H__
|
2
|
+
#define __ICONVTC_H__
|
3
|
+
#ifdef TIDY_ICONV_SUPPORT
|
4
|
+
|
5
|
+
/* iconvtc.h -- Interface to iconv transcoding routines
|
6
|
+
|
7
|
+
(c) 1998-2003 (W3C) MIT, ERCIM, Keio University
|
8
|
+
See tidy.h for the copyright notice.
|
9
|
+
|
10
|
+
$Id: iconvtc.h,v 1.1 2003/04/28 22:59:41 hoehrmann Exp $
|
11
|
+
*/
|
12
|
+
|
13
|
+
|
14
|
+
#endif /* TIDY_ICONV_SUPPORT */
|
15
|
+
#endif /* __ICONVTC_H__ */
|
data/ext/tidy/istack.c
ADDED
@@ -0,0 +1,373 @@
|
|
1
|
+
/* istack.c -- inline stack for compatibility with Mosaic
|
2
|
+
|
3
|
+
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
4
|
+
See tidy.h for the copyright notice.
|
5
|
+
|
6
|
+
CVS Info :
|
7
|
+
|
8
|
+
$Author: arnaud02 $
|
9
|
+
$Date: 2006/12/29 16:31:08 $
|
10
|
+
$Revision: 1.21 $
|
11
|
+
|
12
|
+
*/
|
13
|
+
|
14
|
+
#include "tidy-int.h"
|
15
|
+
#include "lexer.h"
|
16
|
+
#include "attrs.h"
|
17
|
+
#include "streamio.h"
|
18
|
+
#include "tmbstr.h"
|
19
|
+
|
20
|
+
/* duplicate attributes */
|
21
|
+
AttVal *TY_(DupAttrs)( TidyDocImpl* doc, AttVal *attrs)
|
22
|
+
{
|
23
|
+
AttVal *newattrs;
|
24
|
+
|
25
|
+
if (attrs == NULL)
|
26
|
+
return attrs;
|
27
|
+
|
28
|
+
newattrs = TY_(NewAttribute)(doc);
|
29
|
+
*newattrs = *attrs;
|
30
|
+
newattrs->next = TY_(DupAttrs)( doc, attrs->next );
|
31
|
+
newattrs->attribute = TY_(tmbstrdup)(doc->allocator, attrs->attribute);
|
32
|
+
newattrs->value = TY_(tmbstrdup)(doc->allocator, attrs->value);
|
33
|
+
newattrs->dict = TY_(FindAttribute)(doc, newattrs);
|
34
|
+
newattrs->asp = attrs->asp ? TY_(CloneNode)(doc, attrs->asp) : NULL;
|
35
|
+
newattrs->php = attrs->php ? TY_(CloneNode)(doc, attrs->php) : NULL;
|
36
|
+
return newattrs;
|
37
|
+
}
|
38
|
+
|
39
|
+
static Bool IsNodePushable( Node *node )
|
40
|
+
{
|
41
|
+
if (node->tag == NULL)
|
42
|
+
return no;
|
43
|
+
|
44
|
+
if (!(node->tag->model & CM_INLINE))
|
45
|
+
return no;
|
46
|
+
|
47
|
+
if (node->tag->model & CM_OBJECT)
|
48
|
+
return no;
|
49
|
+
|
50
|
+
return yes;
|
51
|
+
}
|
52
|
+
|
53
|
+
/*
|
54
|
+
push a copy of an inline node onto stack
|
55
|
+
but don't push if implicit or OBJECT or APPLET
|
56
|
+
(implicit tags are ones generated from the istack)
|
57
|
+
|
58
|
+
One issue arises with pushing inlines when
|
59
|
+
the tag is already pushed. For instance:
|
60
|
+
|
61
|
+
<p><em>text
|
62
|
+
<p><em>more text
|
63
|
+
|
64
|
+
Shouldn't be mapped to
|
65
|
+
|
66
|
+
<p><em>text</em></p>
|
67
|
+
<p><em><em>more text</em></em>
|
68
|
+
*/
|
69
|
+
void TY_(PushInline)( TidyDocImpl* doc, Node *node )
|
70
|
+
{
|
71
|
+
Lexer* lexer = doc->lexer;
|
72
|
+
IStack *istack;
|
73
|
+
|
74
|
+
if (node->implicit)
|
75
|
+
return;
|
76
|
+
|
77
|
+
if ( !IsNodePushable(node) )
|
78
|
+
return;
|
79
|
+
|
80
|
+
if ( !nodeIsFONT(node) && TY_(IsPushed)(doc, node) )
|
81
|
+
return;
|
82
|
+
|
83
|
+
/* make sure there is enough space for the stack */
|
84
|
+
if (lexer->istacksize + 1 > lexer->istacklength)
|
85
|
+
{
|
86
|
+
if (lexer->istacklength == 0)
|
87
|
+
lexer->istacklength = 6; /* this is perhaps excessive */
|
88
|
+
|
89
|
+
lexer->istacklength = lexer->istacklength * 2;
|
90
|
+
lexer->istack = (IStack *)TidyDocRealloc(doc, lexer->istack,
|
91
|
+
sizeof(IStack)*(lexer->istacklength));
|
92
|
+
}
|
93
|
+
|
94
|
+
istack = &(lexer->istack[lexer->istacksize]);
|
95
|
+
istack->tag = node->tag;
|
96
|
+
|
97
|
+
istack->element = TY_(tmbstrdup)(doc->allocator, node->element);
|
98
|
+
istack->attributes = TY_(DupAttrs)( doc, node->attributes );
|
99
|
+
++(lexer->istacksize);
|
100
|
+
}
|
101
|
+
|
102
|
+
static void PopIStack( TidyDocImpl* doc )
|
103
|
+
{
|
104
|
+
Lexer* lexer = doc->lexer;
|
105
|
+
IStack *istack;
|
106
|
+
AttVal *av;
|
107
|
+
|
108
|
+
--(lexer->istacksize);
|
109
|
+
istack = &(lexer->istack[lexer->istacksize]);
|
110
|
+
|
111
|
+
while (istack->attributes)
|
112
|
+
{
|
113
|
+
av = istack->attributes;
|
114
|
+
istack->attributes = av->next;
|
115
|
+
TY_(FreeAttribute)( doc, av );
|
116
|
+
}
|
117
|
+
TidyDocFree(doc, istack->element);
|
118
|
+
}
|
119
|
+
|
120
|
+
static void PopIStackUntil( TidyDocImpl* doc, TidyTagId tid )
|
121
|
+
{
|
122
|
+
Lexer* lexer = doc->lexer;
|
123
|
+
IStack *istack;
|
124
|
+
|
125
|
+
while (lexer->istacksize > 0)
|
126
|
+
{
|
127
|
+
PopIStack( doc );
|
128
|
+
istack = &(lexer->istack[lexer->istacksize]);
|
129
|
+
if ( istack->tag->id == tid )
|
130
|
+
break;
|
131
|
+
}
|
132
|
+
}
|
133
|
+
|
134
|
+
/* pop inline stack */
|
135
|
+
void TY_(PopInline)( TidyDocImpl* doc, Node *node )
|
136
|
+
{
|
137
|
+
Lexer* lexer = doc->lexer;
|
138
|
+
|
139
|
+
if (node)
|
140
|
+
{
|
141
|
+
if ( !IsNodePushable(node) )
|
142
|
+
return;
|
143
|
+
|
144
|
+
/* if node is </a> then pop until we find an <a> */
|
145
|
+
if ( nodeIsA(node) )
|
146
|
+
{
|
147
|
+
PopIStackUntil( doc, TidyTag_A );
|
148
|
+
return;
|
149
|
+
}
|
150
|
+
}
|
151
|
+
|
152
|
+
if (lexer->istacksize > 0)
|
153
|
+
{
|
154
|
+
PopIStack( doc );
|
155
|
+
|
156
|
+
/* #427822 - fix by Randy Waki 7 Aug 00 */
|
157
|
+
if (lexer->insert >= lexer->istack + lexer->istacksize)
|
158
|
+
lexer->insert = NULL;
|
159
|
+
}
|
160
|
+
}
|
161
|
+
|
162
|
+
Bool TY_(IsPushed)( TidyDocImpl* doc, Node *node )
|
163
|
+
{
|
164
|
+
Lexer* lexer = doc->lexer;
|
165
|
+
int i;
|
166
|
+
|
167
|
+
for (i = lexer->istacksize - 1; i >= 0; --i)
|
168
|
+
{
|
169
|
+
if (lexer->istack[i].tag == node->tag)
|
170
|
+
return yes;
|
171
|
+
}
|
172
|
+
|
173
|
+
return no;
|
174
|
+
}
|
175
|
+
|
176
|
+
/*
|
177
|
+
Test whether the last element on the stack has the same type than "node".
|
178
|
+
*/
|
179
|
+
Bool TY_(IsPushedLast)( TidyDocImpl* doc, Node *element, Node *node )
|
180
|
+
{
|
181
|
+
Lexer* lexer = doc->lexer;
|
182
|
+
|
183
|
+
if ( element && !IsNodePushable(element) )
|
184
|
+
return no;
|
185
|
+
|
186
|
+
if (lexer->istacksize > 0) {
|
187
|
+
if (lexer->istack[lexer->istacksize - 1].tag == node->tag) {
|
188
|
+
return yes;
|
189
|
+
}
|
190
|
+
}
|
191
|
+
|
192
|
+
return no;
|
193
|
+
}
|
194
|
+
|
195
|
+
/*
|
196
|
+
This has the effect of inserting "missing" inline
|
197
|
+
elements around the contents of blocklevel elements
|
198
|
+
such as P, TD, TH, DIV, PRE etc. This procedure is
|
199
|
+
called at the start of ParseBlock. when the inline
|
200
|
+
stack is not empty, as will be the case in:
|
201
|
+
|
202
|
+
<i><h1>italic heading</h1></i>
|
203
|
+
|
204
|
+
which is then treated as equivalent to
|
205
|
+
|
206
|
+
<h1><i>italic heading</i></h1>
|
207
|
+
|
208
|
+
This is implemented by setting the lexer into a mode
|
209
|
+
where it gets tokens from the inline stack rather than
|
210
|
+
from the input stream.
|
211
|
+
*/
|
212
|
+
int TY_(InlineDup)( TidyDocImpl* doc, Node* node )
|
213
|
+
{
|
214
|
+
Lexer* lexer = doc->lexer;
|
215
|
+
int n;
|
216
|
+
|
217
|
+
if ((n = lexer->istacksize - lexer->istackbase) > 0)
|
218
|
+
{
|
219
|
+
lexer->insert = &(lexer->istack[lexer->istackbase]);
|
220
|
+
lexer->inode = node;
|
221
|
+
}
|
222
|
+
|
223
|
+
return n;
|
224
|
+
}
|
225
|
+
|
226
|
+
/*
|
227
|
+
defer duplicates when entering a table or other
|
228
|
+
element where the inlines shouldn't be duplicated
|
229
|
+
*/
|
230
|
+
void TY_(DeferDup)( TidyDocImpl* doc )
|
231
|
+
{
|
232
|
+
doc->lexer->insert = NULL;
|
233
|
+
doc->lexer->inode = NULL;
|
234
|
+
}
|
235
|
+
|
236
|
+
Node *TY_(InsertedToken)( TidyDocImpl* doc )
|
237
|
+
{
|
238
|
+
Lexer* lexer = doc->lexer;
|
239
|
+
Node *node;
|
240
|
+
IStack *istack;
|
241
|
+
uint n;
|
242
|
+
|
243
|
+
/* this will only be NULL if inode != NULL */
|
244
|
+
if (lexer->insert == NULL)
|
245
|
+
{
|
246
|
+
node = lexer->inode;
|
247
|
+
lexer->inode = NULL;
|
248
|
+
return node;
|
249
|
+
}
|
250
|
+
|
251
|
+
/*
|
252
|
+
If this is the "latest" node then update
|
253
|
+
the position, otherwise use current values
|
254
|
+
*/
|
255
|
+
|
256
|
+
if (lexer->inode == NULL)
|
257
|
+
{
|
258
|
+
lexer->lines = doc->docIn->curline;
|
259
|
+
lexer->columns = doc->docIn->curcol;
|
260
|
+
}
|
261
|
+
|
262
|
+
node = TY_(NewNode)(doc->allocator, lexer);
|
263
|
+
node->type = StartTag;
|
264
|
+
node->implicit = yes;
|
265
|
+
node->start = lexer->txtstart;
|
266
|
+
/* #431734 [JTidy bug #226261 (was 126261)] - fix by Gary Peskin 20 Dec 00 */
|
267
|
+
node->end = lexer->txtend; /* was : lexer->txtstart; */
|
268
|
+
istack = lexer->insert;
|
269
|
+
|
270
|
+
#if 0 && defined(_DEBUG)
|
271
|
+
if ( lexer->istacksize == 0 )
|
272
|
+
fprintf( stderr, "0-size istack!\n" );
|
273
|
+
#endif
|
274
|
+
|
275
|
+
node->element = TY_(tmbstrdup)(doc->allocator, istack->element);
|
276
|
+
node->tag = istack->tag;
|
277
|
+
node->attributes = TY_(DupAttrs)( doc, istack->attributes );
|
278
|
+
|
279
|
+
/* advance lexer to next item on the stack */
|
280
|
+
n = (uint)(lexer->insert - &(lexer->istack[0]));
|
281
|
+
|
282
|
+
/* and recover state if we have reached the end */
|
283
|
+
if (++n < lexer->istacksize)
|
284
|
+
lexer->insert = &(lexer->istack[n]);
|
285
|
+
else
|
286
|
+
lexer->insert = NULL;
|
287
|
+
|
288
|
+
return node;
|
289
|
+
}
|
290
|
+
|
291
|
+
|
292
|
+
/*
|
293
|
+
We have two CM_INLINE elements pushed ... the first is closing,
|
294
|
+
but, like the browser, the second should be retained ...
|
295
|
+
Like <b>bold <i>bold and italics</b> italics only</i>
|
296
|
+
This function switches the tag positions on the stack,
|
297
|
+
returning 'yes' if both were found in the expected order.
|
298
|
+
*/
|
299
|
+
Bool TY_(SwitchInline)( TidyDocImpl* doc, Node* element, Node* node )
|
300
|
+
{
|
301
|
+
Lexer* lexer = doc->lexer;
|
302
|
+
if ( lexer
|
303
|
+
&& element && element->tag
|
304
|
+
&& node && node->tag
|
305
|
+
&& TY_(IsPushed)( doc, element )
|
306
|
+
&& TY_(IsPushed)( doc, node )
|
307
|
+
&& ((lexer->istacksize - lexer->istackbase) >= 2) )
|
308
|
+
{
|
309
|
+
/* we have a chance of succeeding ... */
|
310
|
+
int i;
|
311
|
+
for (i = (lexer->istacksize - lexer->istackbase - 1); i >= 0; --i)
|
312
|
+
{
|
313
|
+
if (lexer->istack[i].tag == element->tag) {
|
314
|
+
/* found the element tag - phew */
|
315
|
+
IStack *istack1 = &lexer->istack[i];
|
316
|
+
IStack *istack2 = NULL;
|
317
|
+
--i; /* back one more, and continue */
|
318
|
+
for ( ; i >= 0; --i)
|
319
|
+
{
|
320
|
+
if (lexer->istack[i].tag == node->tag)
|
321
|
+
{
|
322
|
+
/* found the element tag - phew */
|
323
|
+
istack2 = &lexer->istack[i];
|
324
|
+
break;
|
325
|
+
}
|
326
|
+
}
|
327
|
+
if ( istack2 )
|
328
|
+
{
|
329
|
+
/* perform the swap */
|
330
|
+
IStack tmp_istack = *istack2;
|
331
|
+
*istack2 = *istack1;
|
332
|
+
*istack1 = tmp_istack;
|
333
|
+
return yes;
|
334
|
+
}
|
335
|
+
}
|
336
|
+
}
|
337
|
+
}
|
338
|
+
return no;
|
339
|
+
}
|
340
|
+
|
341
|
+
/*
|
342
|
+
We want to push a specific a specific element on the stack,
|
343
|
+
but it may not be the last element, which InlineDup()
|
344
|
+
would handle. Return yes, if found and inserted.
|
345
|
+
*/
|
346
|
+
Bool TY_(InlineDup1)( TidyDocImpl* doc, Node* node, Node* element )
|
347
|
+
{
|
348
|
+
Lexer* lexer = doc->lexer;
|
349
|
+
int n, i;
|
350
|
+
if ( element
|
351
|
+
&& (element->tag != NULL)
|
352
|
+
&& ((n = lexer->istacksize - lexer->istackbase) > 0) )
|
353
|
+
{
|
354
|
+
for ( i = n - 1; i >=0; --i ) {
|
355
|
+
if (lexer->istack[i].tag == element->tag) {
|
356
|
+
/* found our element tag - insert it */
|
357
|
+
lexer->insert = &(lexer->istack[i]);
|
358
|
+
lexer->inode = node;
|
359
|
+
return yes;
|
360
|
+
}
|
361
|
+
}
|
362
|
+
}
|
363
|
+
return no;
|
364
|
+
}
|
365
|
+
|
366
|
+
/*
|
367
|
+
* local variables:
|
368
|
+
* mode: c
|
369
|
+
* indent-tabs-mode: nil
|
370
|
+
* c-basic-offset: 4
|
371
|
+
* eval: (c-set-offset 'substatement-open 0)
|
372
|
+
* end:
|
373
|
+
*/
|